#!/usr/bin/env lua
--[[--------------------------------------------------------------------

  LuaSrcDiet
  Compresses Lua source code by removing unnecessary characters.
  For Lua 5.1.x source code.

  Copyright (c) 2008,2011,2012 Kein-Hong Man <keinhong@gmail.com>
  The COPYRIGHT file describes the conditions
  under which this software may be distributed.

----------------------------------------------------------------------]]

--[[--------------------------------------------------------------------
-- NOTES:
-- * Remember to update version and date information below (MSG_TITLE)
-- * TODO: passing data tables around is a horrific mess
-- * TODO: to implement pcall() to properly handle lexer etc. errors
-- * TODO: need some automatic testing for a semblance of sanity
-- * TODO: the plugin module is highly experimental and unstable
----------------------------------------------------------------------]]

-- standard libraries, functions
local string=string
local math=math
local table=table
local require=require
local print=print
local sub=string.sub
local gmatch=string.gmatch
local match=string.match

-- modules incorporated as preload functions follows
local preload=package.preload
local base=_G

local plugin_info={
html="html    generates a HTML file for checking globals",
sloc="sloc    calculates SLOC for given source file",
}

local p_embedded={
'html',
'sloc',
}

-- preload function for module llex
preload.llex=
function()
--start of inserted module
module"llex"

local string=base.require"string"
local find=string.find
local match=string.match
local sub=string.sub

----------------------------------------------------------------------
-- initialize keyword list, variables
----------------------------------------------------------------------

local kw={}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while]],"%S+")do
kw[v]=true
end

-- see init() for module variables (externally visible):
--       tok, seminfo, tokln

local z,-- source stream
sourceid,-- name of source
I,-- position of lexer
buff,-- buffer for strings
ln-- line number

----------------------------------------------------------------------
-- add information to token listing
----------------------------------------------------------------------

local function addtoken(token,info)
local i=#tok+1
tok[i]=token
seminfo[i]=info
tokln[i]=ln
end

----------------------------------------------------------------------
-- handles line number incrementation and end-of-line characters
----------------------------------------------------------------------

local function inclinenumber(i,is_tok)
local sub=sub
local old=sub(z,i,i)
i=i+1-- skip '\n' or '\r'
local c=sub(z,i,i)
if(c=="\n"or c=="\r")and(c~=old)then
i=i+1-- skip '\n\r' or '\r\n'
old=old..c
end
if is_tok then addtoken("TK_EOL",old)end
ln=ln+1
I=i
return i
end

----------------------------------------------------------------------
-- initialize lexer for given source _z and source name _sourceid
----------------------------------------------------------------------

function init(_z,_sourceid)
z=_z-- source
sourceid=_sourceid-- name of source
I=1-- lexer's position in source
ln=1-- line number
tok={}-- lexed token list*
seminfo={}-- lexed semantic information list*
tokln={}-- line numbers for messages*
-- (*) externally visible thru' module
--------------------------------------------------------------------
-- initial processing (shbang handling)
--------------------------------------------------------------------
local p,_,q,r=find(z,"^(#[^\r\n]*)(\r?\n?)")
if p then-- skip first line
I=I+#q
addtoken("TK_COMMENT",q)
if#r>0 then inclinenumber(I,true)end
end
end

----------------------------------------------------------------------
-- returns a chunk name or id, no truncation for long names
----------------------------------------------------------------------

function chunkid()
if sourceid and match(sourceid,"^[=@]")then
return sub(sourceid,2)-- remove first char
end
return"[string]"
end

----------------------------------------------------------------------
-- formats error message and throws error
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------

function errorline(s,line)
local e=error or base.error
e(string.format("%s:%d: %s",chunkid(),line or ln,s))
end
local errorline=errorline

------------------------------------------------------------------------
-- count separators ("=") in a long string delimiter
------------------------------------------------------------------------

local function skip_sep(i)
local sub=sub
local s=sub(z,i,i)
i=i+1
local count=#match(z,"=*",i)
i=i+count
I=i
return(sub(z,i,i)==s)and count or(-count)-1
end

----------------------------------------------------------------------
-- reads a long string or long comment
----------------------------------------------------------------------

local function read_long_string(is_str,sep)
local i=I+1-- skip 2nd '['
local sub=sub
local c=sub(z,i,i)
if c=="\r"or c=="\n"then-- string starts with a newline?
i=inclinenumber(i)-- skip it
end
while true do
local p,q,r=find(z,"([\r\n%]])",i)-- (long range match)
if not p then
errorline(is_str and"unfinished long string"or
"unfinished long comment")
end
i=p
if r=="]"then-- delimiter test
if skip_sep(i)==sep then
buff=sub(z,buff,I)
I=I+1-- skip 2nd ']'
return buff
end
i=I
else-- newline
buff=buff.."\n"
i=inclinenumber(i)
end
end--while
end

----------------------------------------------------------------------
-- reads a string
----------------------------------------------------------------------

local function read_string(del)
local i=I
local find=find
local sub=sub
while true do
local p,q,r=find(z,"([\n\r\\\"\'])",i)-- (long range match)
if p then
if r=="\n"or r=="\r"then
errorline("unfinished string")
end
i=p
if r=="\\"then-- handle escapes
i=i+1
r=sub(z,i,i)
if r==""then break end-- (EOZ error)
p=find("abfnrtv\n\r",r,1,true)
------------------------------------------------------
if p then-- special escapes
if p>7 then
i=inclinenumber(i)
else
i=i+1
end
------------------------------------------------------
elseif find(r,"%D")then-- other non-digits
i=i+1
------------------------------------------------------
else-- \xxx sequence
local p,q,s=find(z,"^(%d%d?%d?)",i)
i=q+1
if s+1>256 then-- UCHAR_MAX
errorline("escape sequence too large")
end
------------------------------------------------------
end--if p
else
i=i+1
if r==del then-- ending delimiter
I=i
return sub(z,buff,i-1)-- return string
end
end--if r
else
break-- (error)
end--if p
end--while
errorline("unfinished string")
end

------------------------------------------------------------------------
-- main lexer function
------------------------------------------------------------------------

function llex()
local find=find
local match=match
while true do--outer
local i=I
-- inner loop allows break to be used to nicely section tests
while true do--inner
----------------------------------------------------------------
local p,_,r=find(z,"^([_%a][_%w]*)",i)
if p then
I=i+#r
if kw[r]then
addtoken("TK_KEYWORD",r)-- reserved word (keyword)
else
addtoken("TK_NAME",r)-- identifier
end
break-- (continue)
end
----------------------------------------------------------------
local p,_,r=find(z,"^(%.?)%d",i)
if p then-- numeral
if r=="."then i=i+1 end
local _,q,r=find(z,"^%d*[%.%d]*([eE]?)",i)
i=q+1
if#r==1 then-- optional exponent
if match(z,"^[%+%-]",i)then-- optional sign
i=i+1
end
end
local _,q=find(z,"^[_%w]*",i)
I=q+1
local v=sub(z,p,q)-- string equivalent
if not base.tonumber(v)then-- handles hex test also
errorline("malformed number")
end
addtoken("TK_NUMBER",v)
break-- (continue)
end
----------------------------------------------------------------
local p,q,r,t=find(z,"^((%s)[ \t\v\f]*)",i)
if p then
if t=="\n"or t=="\r"then-- newline
inclinenumber(i,true)
else
I=q+1-- whitespace
addtoken("TK_SPACE",r)
end
break-- (continue)
end
----------------------------------------------------------------
local r=match(z,"^%p",i)
if r then
buff=i
local p=find("-[\"\'.=<>~",r,1,true)
if p then
-- two-level if block for punctuation/symbols
--------------------------------------------------------
if p<=2 then
if p==1 then-- minus
local c=match(z,"^%-%-(%[?)",i)
if c then
i=i+2
local sep=-1
if c=="["then
sep=skip_sep(i)
end
if sep>=0 then-- long comment
addtoken("TK_LCOMMENT",read_long_string(false,sep))
else-- short comment
I=find(z,"[\n\r]",i)or(#z+1)
addtoken("TK_COMMENT",sub(z,buff,I-1))
end
break-- (continue)
end
-- (fall through for "-")
else-- [ or long string
local sep=skip_sep(i)
if sep>=0 then
addtoken("TK_LSTRING",read_long_string(true,sep))
elseif sep==-1 then
addtoken("TK_OP","[")
else
errorline("invalid long string delimiter")
end
break-- (continue)
end
--------------------------------------------------------
elseif p<=5 then
if p<5 then-- strings
I=i+1
addtoken("TK_STRING",read_string(r))
break-- (continue)
end
r=match(z,"^%.%.?%.?",i)-- .|..|... dots
-- (fall through)
--------------------------------------------------------
else-- relational
r=match(z,"^%p=?",i)
-- (fall through)
end
end
I=i+#r
addtoken("TK_OP",r)-- for other symbols, fall through
break-- (continue)
end
----------------------------------------------------------------
local r=sub(z,i,i)
if r~=""then
I=i+1
addtoken("TK_OP",r)-- other single-char tokens
break
end
addtoken("TK_EOS","")-- end of stream,
return-- exit here
----------------------------------------------------------------
end--while inner
end--while outer
end
--end of inserted module
end

-- preload function for module lparser
preload.lparser=
function()
--start of inserted module
module"lparser"

local string=base.require"string"

--[[--------------------------------------------------------------------
-- variable and data structure initialization
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- initialization: main variables
----------------------------------------------------------------------

local toklist,-- grammar-only token tables (token table,
seminfolist,-- semantic information table, line number
toklnlist,-- table, cross-reference table)
xreflist,
tpos,-- token position

line,-- start line # for error messages
lastln,-- last line # for ambiguous syntax chk
tok,seminfo,ln,xref,-- token, semantic info, line
nameref,-- proper position of <name> token
fs,-- current function state
top_fs,-- top-level function state

globalinfo,-- global variable information table
globallookup,-- global variable name lookup table
localinfo,-- local variable information table
ilocalinfo,-- inactive locals (prior to activation)
ilocalrefs,-- corresponding references to activate
statinfo-- statements labeled by type

-- forward references for local functions
local explist1,expr,block,exp1,body,chunk

----------------------------------------------------------------------
-- initialization: data structures
----------------------------------------------------------------------

local gmatch=string.gmatch

local block_follow={}-- lookahead check in chunk(), returnstat()
for v in gmatch("else elseif end until <eof>","%S+")do
block_follow[v]=true
end

local binopr_left={}-- binary operators, left priority
local binopr_right={}-- binary operators, right priority
for op,lt,rt in gmatch([[
{+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
{^ 10 9}{.. 5 4}
{~= 3 3}{== 3 3}
{< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
{and 2 2}{or 1 1}
]],"{(%S+)%s(%d+)%s(%d+)}")do
binopr_left[op]=lt+0
binopr_right[op]=rt+0
end

local unopr={["not"]=true,["-"]=true,
["#"]=true,}-- unary operators
local UNARY_PRIORITY=8-- priority for unary operators

--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- formats error message and throws error (duplicated from llex)
-- * a simplified version, does not report what token was responsible
----------------------------------------------------------------------

local function errorline(s,line)
local e=error or base.error
e(string.format("(source):%d: %s",line or ln,s))
end

----------------------------------------------------------------------
-- handles incoming token, semantic information pairs
-- * NOTE: 'nextt' is named 'next' originally
----------------------------------------------------------------------

-- reads in next token
local function nextt()
lastln=toklnlist[tpos]
tok,seminfo,ln,xref
=toklist[tpos],seminfolist[tpos],toklnlist[tpos],xreflist[tpos]
tpos=tpos+1
end

-- peek at next token (single lookahead for table constructor)
local function lookahead()
return toklist[tpos]
end

----------------------------------------------------------------------
-- throws a syntax error, or if token expected is not there
----------------------------------------------------------------------

local function syntaxerror(msg)
local tok=tok
if tok~="<number>"and tok~="<string>"then
if tok=="<name>"then tok=seminfo end
tok="'"..tok.."'"
end
errorline(msg.." near "..tok)
end

local function error_expected(token)
syntaxerror("'"..token.."' expected")
end

----------------------------------------------------------------------
-- tests for a token, returns outcome
-- * return value changed to boolean
----------------------------------------------------------------------

local function testnext(c)
if tok==c then nextt();return true end
end

----------------------------------------------------------------------
-- check for existence of a token, throws error if not found
----------------------------------------------------------------------

local function check(c)
if tok~=c then error_expected(c)end
end

----------------------------------------------------------------------
-- verify existence of a token, then skip it
----------------------------------------------------------------------

local function checknext(c)
check(c);nextt()
end

----------------------------------------------------------------------
-- throws error if condition not matched
----------------------------------------------------------------------

local function check_condition(c,msg)
if not c then syntaxerror(msg)end
end

----------------------------------------------------------------------
-- verifies token conditions are met or else throw error
----------------------------------------------------------------------

local function check_match(what,who,where)
if not testnext(what)then
if where==ln then
error_expected(what)
else
syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
end
end
end

----------------------------------------------------------------------
-- expect that token is a name, return the name
----------------------------------------------------------------------

local function str_checkname()
check("<name>")
local ts=seminfo
nameref=xref
nextt()
return ts
end

----------------------------------------------------------------------
-- adds given string s in string pool, sets e as VK
----------------------------------------------------------------------

local function codestring(e,s)
e.k="VK"
end

----------------------------------------------------------------------
-- consume a name token, adds it to string pool
----------------------------------------------------------------------

local function checkname(e)
codestring(e,str_checkname())
end

--[[--------------------------------------------------------------------
-- variable (global|local|upvalue) handling
-- * to track locals and globals, variable management code needed
-- * entry point is singlevar() for variable lookups
-- * lookup tables (bl.locallist) are maintained awkwardly in the basic
--   block data structures, PLUS the function data structure (this is
--   an inelegant hack, since bl is nil for the top level of a function)
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- register a local variable, create local variable object, set in
-- to-activate variable list
-- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
--   localfunc(), localstat()
----------------------------------------------------------------------

local function new_localvar(name,special)
local bl=fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist=bl.locallist
else
locallist=fs.locallist
end
-- build local variable information object and set localinfo
local id=#localinfo+1
localinfo[id]={-- new local variable object
name=name,-- local variable name
xref={nameref},-- xref, first value is declaration
decl=nameref,-- location of declaration, = xref[1]
}
if special then-- "self" must be not be changed
localinfo[id].isself=true
end
-- this can override a local with the same name in the same scope
-- but first, keep it inactive until it gets activated
local i=#ilocalinfo+1
ilocalinfo[i]=id
ilocalrefs[i]=locallist
end

----------------------------------------------------------------------
-- actually activate the variables so that they are visible
-- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
-- * used in parlist(), forbody(), localfunc(), localstat(), body()
----------------------------------------------------------------------

local function adjustlocalvars(nvars)
local sz=#ilocalinfo
-- i goes from left to right, in order of local allocation, because
-- of something like: local a,a,a = 1,2,3 which gives a = 3
while nvars>0 do
nvars=nvars-1
local i=sz-nvars
local id=ilocalinfo[i]-- local's id
local obj=localinfo[id]
local name=obj.name-- name of local
obj.act=xref-- set activation location
ilocalinfo[i]=nil
local locallist=ilocalrefs[i]-- ref to lookup table to update
ilocalrefs[i]=nil
local existing=locallist[name]-- if existing, remove old first!
if existing then-- do not overlap, set special
obj=localinfo[existing]-- form of rem, as -id
obj.rem=-id
end
locallist[name]=id-- activate, now visible to Lua
end
end

----------------------------------------------------------------------
-- remove (deactivate) variables in current scope (before scope exits)
-- * zap entire locallist tables since we are not allocating registers
-- * used in leaveblock(), close_func()
----------------------------------------------------------------------

local function removevars()
local bl=fs.bl
local locallist
-- locate locallist in current block object or function root object
if bl then
locallist=bl.locallist
else
locallist=fs.locallist
end
-- enumerate the local list at current scope and deactivate 'em
for name,id in base.pairs(locallist)do
local obj=localinfo[id]
obj.rem=xref-- set deactivation location
end
end

----------------------------------------------------------------------
-- creates a new local variable given a name
-- * skips internal locals (those starting with '('), so internal
--   locals never needs a corresponding adjustlocalvars() call
-- * special is true for "self" which must not be optimized
-- * used in fornum(), forlist(), parlist(), body()
----------------------------------------------------------------------

local function new_localvarliteral(name,special)
if string.sub(name,1,1)=="("then-- can skip internal locals
return
end
new_localvar(name,special)
end

----------------------------------------------------------------------
-- search the local variable namespace of the given fs for a match
-- * returns localinfo index
-- * used only in singlevaraux()
----------------------------------------------------------------------

local function searchvar(fs,n)
local bl=fs.bl
local locallist
if bl then
locallist=bl.locallist
while locallist do
if locallist[n]then return locallist[n]end-- found
bl=bl.prev
locallist=bl and bl.locallist
end
end
locallist=fs.locallist
return locallist[n]or-1-- found or not found (-1)
end

----------------------------------------------------------------------
-- handle locals, globals and upvalues and related processing
-- * search mechanism is recursive, calls itself to search parents
-- * used only in singlevar()
----------------------------------------------------------------------

local function singlevaraux(fs,n,var)
if fs==nil then-- no more levels?
var.k="VGLOBAL"-- default is global variable
return"VGLOBAL"
else
local v=searchvar(fs,n)-- look up at current level
if v>=0 then
var.k="VLOCAL"
var.id=v
--  codegen may need to deal with upvalue here
return"VLOCAL"
else-- not found at current level; try upper one
if singlevaraux(fs.prev,n,var)=="VGLOBAL"then
return"VGLOBAL"
end
-- else was LOCAL or UPVAL, handle here
var.k="VUPVAL"-- upvalue in this level
return"VUPVAL"
end--if v
end--if fs
end

----------------------------------------------------------------------
-- consume a name token, creates a variable (global|local|upvalue)
-- * used in prefixexp(), funcname()
----------------------------------------------------------------------

local function singlevar(v)
local name=str_checkname()
singlevaraux(fs,name,v)
------------------------------------------------------------------
-- variable tracking
------------------------------------------------------------------
if v.k=="VGLOBAL"then
-- if global being accessed, keep track of it by creating an object
local id=globallookup[name]
if not id then
id=#globalinfo+1
globalinfo[id]={-- new global variable object
name=name,-- global variable name
xref={nameref},-- xref, first value is declaration
}
globallookup[name]=id-- remember it
else
local obj=globalinfo[id].xref
obj[#obj+1]=nameref-- add xref
end
else
-- local/upvalue is being accessed, keep track of it
local id=v.id
local obj=localinfo[id].xref
obj[#obj+1]=nameref-- add xref
end
end

--[[--------------------------------------------------------------------
-- state management functions with open/close pairs
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- enters a code unit, initializes elements
----------------------------------------------------------------------

local function enterblock(isbreakable)
local bl={}-- per-block state
bl.isbreakable=isbreakable
bl.prev=fs.bl
bl.locallist={}
fs.bl=bl
end

----------------------------------------------------------------------
-- leaves a code unit, close any upvalues
----------------------------------------------------------------------

local function leaveblock()
local bl=fs.bl
removevars()
fs.bl=bl.prev
end

----------------------------------------------------------------------
-- opening of a function
-- * top_fs is only for anchoring the top fs, so that parser() can
--   return it to the caller function along with useful output
-- * used in parser() and body()
----------------------------------------------------------------------

local function open_func()
local new_fs-- per-function state
if not fs then-- top_fs is created early
new_fs=top_fs
else
new_fs={}
end
new_fs.prev=fs-- linked list of function states
new_fs.bl=nil
new_fs.locallist={}
fs=new_fs
end

----------------------------------------------------------------------
-- closing of a function
-- * used in parser() and body()
----------------------------------------------------------------------

local function close_func()
removevars()
fs=fs.prev
end

--[[--------------------------------------------------------------------
-- other parsing functions
-- * for table constructor, parameter list, argument list
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- parse a function name suffix, for function call specifications
-- * used in primaryexp(), funcname()
----------------------------------------------------------------------

local function field(v)
-- field -> ['.' | ':'] NAME
local key={}
nextt()-- skip the dot or colon
checkname(key)
v.k="VINDEXED"
end

----------------------------------------------------------------------
-- parse a table indexing suffix, for constructors, expressions
-- * used in recfield(), primaryexp()
----------------------------------------------------------------------

local function yindex(v)
-- index -> '[' expr ']'
nextt()-- skip the '['
expr(v)
checknext("]")
end

----------------------------------------------------------------------
-- parse a table record (hash) field
-- * used in constructor()
----------------------------------------------------------------------

local function recfield(cc)
-- recfield -> (NAME | '['exp1']') = exp1
local key,val={},{}
if tok=="<name>"then
checkname(key)
else-- tok == '['
yindex(key)
end
checknext("=")
expr(val)
end

----------------------------------------------------------------------
-- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH)
-- * note: retained in this skeleton because it modifies cc.v.k
-- * used in constructor()
----------------------------------------------------------------------

local function closelistfield(cc)
if cc.v.k=="VVOID"then return end-- there is no list item
cc.v.k="VVOID"
end

----------------------------------------------------------------------
-- parse a table list (array) field
-- * used in constructor()
----------------------------------------------------------------------

local function listfield(cc)
expr(cc.v)
end

----------------------------------------------------------------------
-- parse a table constructor
-- * used in funcargs(), simpleexp()
----------------------------------------------------------------------

local function constructor(t)
-- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
-- field -> recfield | listfield
-- fieldsep -> ',' | ';'
local line=ln
local cc={}
cc.v={}
cc.t=t
t.k="VRELOCABLE"
cc.v.k="VVOID"
checknext("{")
repeat
if tok=="}"then break end
-- closelistfield(cc) here
local c=tok
if c=="<name>"then-- may be listfields or recfields
if lookahead()~="="then-- look ahead: expression?
listfield(cc)
else
recfield(cc)
end
elseif c=="["then-- constructor_item -> recfield
recfield(cc)
else-- constructor_part -> listfield
listfield(cc)
end
until not testnext(",")and not testnext(";")
check_match("}","{",line)
-- lastlistfield(cc) here
end

----------------------------------------------------------------------
-- parse the arguments (parameters) of a function declaration
-- * used in body()
----------------------------------------------------------------------

local function parlist()
-- parlist -> [ param { ',' param } ]
local nparams=0
if tok~=")"then-- is 'parlist' not empty?
repeat
local c=tok
if c=="<name>"then-- param -> NAME
new_localvar(str_checkname())
nparams=nparams+1
elseif c=="..."then
nextt()
fs.is_vararg=true
else
syntaxerror("<name> or '...' expected")
end
until fs.is_vararg or not testnext(",")
end--if
adjustlocalvars(nparams)
end

----------------------------------------------------------------------
-- parse the parameters of a function call
-- * contrast with parlist(), used in function declarations
-- * used in primaryexp()
----------------------------------------------------------------------

local function funcargs(f)
local args={}
local line=ln
local c=tok
if c=="("then-- funcargs -> '(' [ explist1 ] ')'
if line~=lastln then
syntaxerror("ambiguous syntax (function call x new statement)")
end
nextt()
if tok==")"then-- arg list is empty?
args.k="VVOID"
else
explist1(args)
end
check_match(")","(",line)
elseif c=="{"then-- funcargs -> constructor
constructor(args)
elseif c=="<string>"then-- funcargs -> STRING
codestring(args,seminfo)
nextt()-- must use 'seminfo' before 'next'
else
syntaxerror("function arguments expected")
return
end--if c
f.k="VCALL"
end

--[[--------------------------------------------------------------------
-- mostly expression functions
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- parses an expression in parentheses or a single variable
-- * used in primaryexp()
----------------------------------------------------------------------

local function prefixexp(v)
-- prefixexp -> NAME | '(' expr ')'
local c=tok
if c=="("then
local line=ln
nextt()
expr(v)
check_match(")","(",line)
elseif c=="<name>"then
singlevar(v)
else
syntaxerror("unexpected symbol")
end--if c
end

----------------------------------------------------------------------
-- parses a prefixexp (an expression in parentheses or a single
-- variable) or a function call specification
-- * used in simpleexp(), assignment(), expr_stat()
----------------------------------------------------------------------

local function primaryexp(v)
-- primaryexp ->
--    prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
prefixexp(v)
while true do
local c=tok
if c=="."then-- field
field(v)
elseif c=="["then-- '[' exp1 ']'
local key={}
yindex(key)
elseif c==":"then-- ':' NAME funcargs
local key={}
nextt()
checkname(key)
funcargs(v)
elseif c=="("or c=="<string>"or c=="{"then-- funcargs
funcargs(v)
else
return
end--if c
end--while
end

----------------------------------------------------------------------
-- parses general expression types, constants handled here
-- * used in subexpr()
----------------------------------------------------------------------

local function simpleexp(v)
-- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
--              constructor | FUNCTION body | primaryexp
local c=tok
if c=="<number>"then
v.k="VKNUM"
elseif c=="<string>"then
codestring(v,seminfo)
elseif c=="nil"then
v.k="VNIL"
elseif c=="true"then
v.k="VTRUE"
elseif c=="false"then
v.k="VFALSE"
elseif c=="..."then-- vararg
check_condition(fs.is_vararg==true,
"cannot use '...' outside a vararg function");
v.k="VVARARG"
elseif c=="{"then-- constructor
constructor(v)
return
elseif c=="function"then
nextt()
body(v,false,ln)
return
else
primaryexp(v)
return
end--if c
nextt()
end

------------------------------------------------------------------------
-- Parse subexpressions. Includes handling of unary operators and binary
-- operators. A subexpr is given the rhs priority level of the operator
-- immediately left of it, if any (limit is -1 if none,) and if a binop
-- is found, limit is compared with the lhs priority level of the binop
-- in order to determine which executes first.
-- * recursively called
-- * used in expr()
------------------------------------------------------------------------

local function subexpr(v,limit)
-- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
--   * where 'binop' is any binary operator with a priority
--     higher than 'limit'
local op=tok
local uop=unopr[op]
if uop then
nextt()
subexpr(v,UNARY_PRIORITY)
else
simpleexp(v)
end
-- expand while operators have priorities higher than 'limit'
op=tok
local binop=binopr_left[op]
while binop and binop>limit do
local v2={}
nextt()
-- read sub-expression with higher priority
local nextop=subexpr(v2,binopr_right[op])
op=nextop
binop=binopr_left[op]
end
return op-- return first untreated operator
end

----------------------------------------------------------------------
-- Expression parsing starts here. Function subexpr is entered with the
-- left operator (which is non-existent) priority of -1, which is lower
-- than all actual operators. Expr information is returned in parm v.
-- * used in cond(), explist1(), index(), recfield(), listfield(),
--   prefixexp(), while_stat(), exp1()
----------------------------------------------------------------------

-- this is a forward-referenced local
function expr(v)
-- expr -> subexpr
subexpr(v,0)
end

--[[--------------------------------------------------------------------
-- third level parsing functions
----------------------------------------------------------------------]]

------------------------------------------------------------------------
-- parse a variable assignment sequence
-- * recursively called
-- * used in expr_stat()
------------------------------------------------------------------------

local function assignment(v)
local e={}
local c=v.v.k
check_condition(c=="VLOCAL"or c=="VUPVAL"or c=="VGLOBAL"
or c=="VINDEXED","syntax error")
if testnext(",")then-- assignment -> ',' primaryexp assignment
local nv={}-- expdesc
nv.v={}
primaryexp(nv.v)
-- lparser.c deals with some register usage conflict here
assignment(nv)
else-- assignment -> '=' explist1
checknext("=")
explist1(e)
return-- avoid default
end
e.k="VNONRELOC"
end

----------------------------------------------------------------------
-- parse a for loop body for both versions of the for loop
-- * used in fornum(), forlist()
----------------------------------------------------------------------

local function forbody(nvars,isnum)
-- forbody -> DO block
checknext("do")
enterblock(false)-- scope for declared variables
adjustlocalvars(nvars)
block()
leaveblock()-- end of scope for declared variables
end

----------------------------------------------------------------------
-- parse a numerical for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------

local function fornum(varname)
-- fornum -> NAME = exp1, exp1 [, exp1] DO body
local line=line
new_localvarliteral("(for index)")
new_localvarliteral("(for limit)")
new_localvarliteral("(for step)")
new_localvar(varname)
checknext("=")
exp1()-- initial value
checknext(",")
exp1()-- limit
if testnext(",")then
exp1()-- optional step
else
-- default step = 1
end
forbody(1,true)
end

----------------------------------------------------------------------
-- parse a generic for loop, calls forbody()
-- * used in for_stat()
----------------------------------------------------------------------

local function forlist(indexname)
-- forlist -> NAME {, NAME} IN explist1 DO body
local e={}
-- create control variables
new_localvarliteral("(for generator)")
new_localvarliteral("(for state)")
new_localvarliteral("(for control)")
-- create declared variables
new_localvar(indexname)
local nvars=1
while testnext(",")do
new_localvar(str_checkname())
nvars=nvars+1
end
checknext("in")
local line=line
explist1(e)
forbody(nvars,false)
end

----------------------------------------------------------------------
-- parse a function name specification
-- * used in func_stat()
----------------------------------------------------------------------

local function funcname(v)
-- funcname -> NAME {field} [':' NAME]
local needself=false
singlevar(v)
while tok=="."do
field(v)
end
if tok==":"then
needself=true
field(v)
end
return needself
end

----------------------------------------------------------------------
-- parse the single expressions needed in numerical for loops
-- * used in fornum()
----------------------------------------------------------------------

-- this is a forward-referenced local
function exp1()
-- exp1 -> expr
local e={}
expr(e)
end

----------------------------------------------------------------------
-- parse condition in a repeat statement or an if control structure
-- * used in repeat_stat(), test_then_block()
----------------------------------------------------------------------

local function cond()
-- cond -> expr
local v={}
expr(v)-- read condition
end

----------------------------------------------------------------------
-- parse part of an if control structure, including the condition
-- * used in if_stat()
----------------------------------------------------------------------

local function test_then_block()
-- test_then_block -> [IF | ELSEIF] cond THEN block
nextt()-- skip IF or ELSEIF
cond()
checknext("then")
block()-- 'then' part
end

----------------------------------------------------------------------
-- parse a local function statement
-- * used in local_stat()
----------------------------------------------------------------------

local function localfunc()
-- localfunc -> NAME body
local v,b={}
new_localvar(str_checkname())
v.k="VLOCAL"
adjustlocalvars(1)
body(b,false,ln)
end

----------------------------------------------------------------------
-- parse a local variable declaration statement
-- * used in local_stat()
----------------------------------------------------------------------

local function localstat()
-- localstat -> NAME {',' NAME} ['=' explist1]
local nvars=0
local e={}
repeat
new_localvar(str_checkname())
nvars=nvars+1
until not testnext(",")
if testnext("=")then
explist1(e)
else
e.k="VVOID"
end
adjustlocalvars(nvars)
end

----------------------------------------------------------------------
-- parse a list of comma-separated expressions
-- * used in return_stat(), localstat(), funcargs(), assignment(),
--   forlist()
----------------------------------------------------------------------

-- this is a forward-referenced local
function explist1(e)
-- explist1 -> expr { ',' expr }
expr(e)
while testnext(",")do
expr(e)
end
end

----------------------------------------------------------------------
-- parse function declaration body
-- * used in simpleexp(), localfunc(), func_stat()
----------------------------------------------------------------------

-- this is a forward-referenced local
function body(e,needself,line)
-- body ->  '(' parlist ')' chunk END
open_func()
checknext("(")
if needself then
new_localvarliteral("self",true)
adjustlocalvars(1)
end
parlist()
checknext(")")
chunk()
check_match("end","function",line)
close_func()
end

----------------------------------------------------------------------
-- parse a code block or unit
-- * used in do_stat(), while_stat(), forbody(), test_then_block(),
--   if_stat()
----------------------------------------------------------------------

-- this is a forward-referenced local
function block()
-- block -> chunk
enterblock(false)
chunk()
leaveblock()
end

--[[--------------------------------------------------------------------
-- second level parsing functions, all with '_stat' suffix
-- * since they are called via a table lookup, they cannot be local
--   functions (a lookup table of local functions might be smaller...)
-- * stat() -> *_stat()
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- initial parsing for a for loop, calls fornum() or forlist()
-- * removed 'line' parameter (used to set debug information only)
-- * used in stat()
----------------------------------------------------------------------

local function for_stat()
-- stat -> for_stat -> FOR (fornum | forlist) END
local line=line
enterblock(true)-- scope for loop and control variables
nextt()-- skip 'for'
local varname=str_checkname()-- first variable name
local c=tok
if c=="="then
fornum(varname)
elseif c==","or c=="in"then
forlist(varname)
else
syntaxerror("'=' or 'in' expected")
end
check_match("end","for",line)
leaveblock()-- loop scope (`break' jumps to this point)
end

----------------------------------------------------------------------
-- parse a while-do control structure, body processed by block()
-- * used in stat()
----------------------------------------------------------------------

local function while_stat()
-- stat -> while_stat -> WHILE cond DO block END
local line=line
nextt()-- skip WHILE
cond()-- parse condition
enterblock(true)
checknext("do")
block()
check_match("end","while",line)
leaveblock()
end

----------------------------------------------------------------------
-- parse a repeat-until control structure, body parsed by chunk()
-- * originally, repeatstat() calls breakstat() too if there is an
--   upvalue in the scope block; nothing is actually lexed, it is
--   actually the common code in breakstat() for closing of upvalues
-- * used in stat()
----------------------------------------------------------------------

local function repeat_stat()
-- stat -> repeat_stat -> REPEAT block UNTIL cond
local line=line
enterblock(true)-- loop block
enterblock(false)-- scope block
nextt()-- skip REPEAT
chunk()
check_match("until","repeat",line)
cond()
-- close upvalues at scope level below
leaveblock()-- finish scope
leaveblock()-- finish loop
end

----------------------------------------------------------------------
-- parse an if control structure
-- * used in stat()
----------------------------------------------------------------------

local function if_stat()
-- stat -> if_stat -> IF cond THEN block
--                    {ELSEIF cond THEN block} [ELSE block] END
local line=line
local v={}
test_then_block()-- IF cond THEN block
while tok=="elseif"do
test_then_block()-- ELSEIF cond THEN block
end
if tok=="else"then
nextt()-- skip ELSE
block()-- 'else' part
end
check_match("end","if",line)
end

----------------------------------------------------------------------
-- parse a return statement
-- * used in stat()
----------------------------------------------------------------------

local function return_stat()
-- stat -> return_stat -> RETURN explist
local e={}
nextt()-- skip RETURN
local c=tok
if block_follow[c]or c==";"then
-- return no values
else
explist1(e)-- optional return values
end
end

----------------------------------------------------------------------
-- parse a break statement
-- * used in stat()
----------------------------------------------------------------------

local function break_stat()
-- stat -> break_stat -> BREAK
local bl=fs.bl
nextt()-- skip BREAK
while bl and not bl.isbreakable do-- find a breakable block
bl=bl.prev
end
if not bl then
syntaxerror("no loop to break")
end
end

----------------------------------------------------------------------
-- parse a function call with no returns or an assignment statement
-- * the struct with .prev is used for name searching in lparse.c,
--   so it is retained for now; present in assignment() also
-- * used in stat()
----------------------------------------------------------------------

local function expr_stat()
local id=tpos-1
-- stat -> expr_stat -> func | assignment
local v={}
v.v={}
primaryexp(v.v)
if v.v.k=="VCALL"then-- stat -> func
-- call statement uses no results
statinfo[id]="call"
else-- stat -> assignment
v.prev=nil
assignment(v)
statinfo[id]="assign"
end
end

----------------------------------------------------------------------
-- parse a function statement
-- * used in stat()
----------------------------------------------------------------------

local function function_stat()
-- stat -> function_stat -> FUNCTION funcname body
local line=line
local v,b={},{}
nextt()-- skip FUNCTION
local needself=funcname(v)
body(b,needself,line)
end

----------------------------------------------------------------------
-- parse a simple block enclosed by a DO..END pair
-- * used in stat()
----------------------------------------------------------------------

local function do_stat()
-- stat -> do_stat -> DO block END
local line=line
nextt()-- skip DO
block()
check_match("end","do",line)
end

----------------------------------------------------------------------
-- parse a statement starting with LOCAL
-- * used in stat()
----------------------------------------------------------------------

local function local_stat()
-- stat -> local_stat -> LOCAL FUNCTION localfunc
--                    -> LOCAL localstat
nextt()-- skip LOCAL
if testnext("function")then-- local function?
localfunc()
else
localstat()
end
end

--[[--------------------------------------------------------------------
-- main functions, top level parsing functions
-- * accessible functions are: init(lexer), parser()
-- * [entry] -> parser() -> chunk() -> stat()
----------------------------------------------------------------------]]

----------------------------------------------------------------------
-- initial parsing for statements, calls '_stat' suffixed functions
-- * used in chunk()
----------------------------------------------------------------------

local stat_call={-- lookup for calls in stat()
["if"]=if_stat,
["while"]=while_stat,
["do"]=do_stat,
["for"]=for_stat,
["repeat"]=repeat_stat,
["function"]=function_stat,
["local"]=local_stat,
["return"]=return_stat,
["break"]=break_stat,
}

local function stat()
-- stat -> if_stat while_stat do_stat for_stat repeat_stat
--         function_stat local_stat return_stat break_stat
--         expr_stat
line=ln-- may be needed for error messages
local c=tok
local fn=stat_call[c]
-- handles: if while do for repeat function local return break
if fn then
statinfo[tpos-1]=c
fn()
-- return or break must be last statement
if c=="return"or c=="break"then return true end
else
expr_stat()
end
return false
end

----------------------------------------------------------------------
-- parse a chunk, which consists of a bunch of statements
-- * used in parser(), body(), block(), repeat_stat()
----------------------------------------------------------------------

-- this is a forward-referenced local
function chunk()
-- chunk -> { stat [';'] }
local islast=false
while not islast and not block_follow[tok]do
islast=stat()
testnext(";")
end
end

----------------------------------------------------------------------
-- performs parsing, returns parsed data structure
----------------------------------------------------------------------

function parser()
open_func()
fs.is_vararg=true-- main func. is always vararg
nextt()-- read first token
chunk()
check("<eof>")
close_func()
return{-- return everything
globalinfo=globalinfo,
localinfo=localinfo,
statinfo=statinfo,
toklist=toklist,
seminfolist=seminfolist,
toklnlist=toklnlist,
xreflist=xreflist,
}
end

----------------------------------------------------------------------
-- initialization function
----------------------------------------------------------------------

function init(tokorig,seminfoorig,toklnorig)
tpos=1-- token position
top_fs={}-- reset top level function state
------------------------------------------------------------------
-- set up grammar-only token tables; impedance-matching...
-- note that constants returned by the lexer is source-level, so
-- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
------------------------------------------------------------------
local j=1
toklist,seminfolist,toklnlist,xreflist={},{},{},{}
for i=1,#tokorig do
local tok=tokorig[i]
local yep=true
if tok=="TK_KEYWORD"or tok=="TK_OP"then
tok=seminfoorig[i]
elseif tok=="TK_NAME"then
tok="<name>"
seminfolist[j]=seminfoorig[i]
elseif tok=="TK_NUMBER"then
tok="<number>"
seminfolist[j]=0-- fake!
elseif tok=="TK_STRING"or tok=="TK_LSTRING"then
tok="<string>"
seminfolist[j]=""-- fake!
elseif tok=="TK_EOS"then
tok="<eof>"
else
-- non-grammar tokens; ignore them
yep=false
end
if yep then-- set rest of the information
toklist[j]=tok
toklnlist[j]=toklnorig[i]
xreflist[j]=i
j=j+1
end
end--for
------------------------------------------------------------------
-- initialize data structures for variable tracking
------------------------------------------------------------------
globalinfo,globallookup,localinfo={},{},{}
ilocalinfo,ilocalrefs={},{}
statinfo={}-- experimental
end
--end of inserted module
end

-- preload function for module optlex
preload.optlex=
function()
--start of inserted module
module"optlex"

local string=base.require"string"
local match=string.match
local sub=string.sub
local find=string.find
local rep=string.rep
local print

------------------------------------------------------------------------
-- variables and data structures
------------------------------------------------------------------------

-- error function, can override by setting own function into module
error=base.error

warn={}-- table for warning flags

local stoks,sinfos,stoklns-- source lists

local is_realtoken={-- significant (grammar) tokens
TK_KEYWORD=true,
TK_NAME=true,
TK_NUMBER=true,
TK_STRING=true,
TK_LSTRING=true,
TK_OP=true,
TK_EOS=true,
}
local is_faketoken={-- whitespace (non-grammar) tokens
TK_COMMENT=true,
TK_LCOMMENT=true,
TK_EOL=true,
TK_SPACE=true,
}

local opt_details-- for extra information

------------------------------------------------------------------------
-- true if current token is at the start of a line
-- * skips over deleted tokens via recursion
------------------------------------------------------------------------

local function atlinestart(i)
local tok=stoks[i-1]
if i<=1 or tok=="TK_EOL"then
return true
elseif tok==""then
return atlinestart(i-1)
end
return false
end

------------------------------------------------------------------------
-- true if current token is at the end of a line
-- * skips over deleted tokens via recursion
------------------------------------------------------------------------

local function atlineend(i)
local tok=stoks[i+1]
if i>=#stoks or tok=="TK_EOL"or tok=="TK_EOS"then
return true
elseif tok==""then
return atlineend(i+1)
end
return false
end

------------------------------------------------------------------------
-- counts comment EOLs inside a long comment
-- * in order to keep line numbering, EOLs need to be reinserted
------------------------------------------------------------------------

local function commenteols(lcomment)
local sep=#match(lcomment,"^%-%-%[=*%[")
local z=sub(lcomment,sep+1,-(sep-1))-- remove delims
local i,c=1,0
while true do
local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i)
if not p then break end-- if no matches, done
i=p+1
c=c+1
if#s>0 and r~=s then-- skip CRLF or LFCR
i=i+1
end
end
return c
end

------------------------------------------------------------------------
-- compares two tokens (i, j) and returns the whitespace required
-- * see documentation for a reference table of interactions
-- * only two grammar/real tokens are being considered
-- * if "", no separation is needed
-- * if " ", then at least one whitespace (or EOL) is required
-- * NOTE: this doesn't work at the start or the end or for EOS!
------------------------------------------------------------------------

local function checkpair(i,j)
local match=match
local t1,t2=stoks[i],stoks[j]
--------------------------------------------------------------------
if t1=="TK_STRING"or t1=="TK_LSTRING"or
t2=="TK_STRING"or t2=="TK_LSTRING"then
return""
--------------------------------------------------------------------
elseif t1=="TK_OP"or t2=="TK_OP"then
if(t1=="TK_OP"and(t2=="TK_KEYWORD"or t2=="TK_NAME"))or
(t2=="TK_OP"and(t1=="TK_KEYWORD"or t1=="TK_NAME"))then
return""
end
if t1=="TK_OP"and t2=="TK_OP"then
-- for TK_OP/TK_OP pairs, see notes in technotes.txt
local op,op2=sinfos[i],sinfos[j]
if(match(op,"^%.%.?$")and match(op2,"^%."))or
(match(op,"^[~=<>]$")and op2=="=")or
(op=="["and(op2=="["or op2=="="))then
return" "
end
return""
end
-- "TK_OP" + "TK_NUMBER" case
local op=sinfos[i]
if t2=="TK_OP"then op=sinfos[j]end
if match(op,"^%.%.?%.?$")then
return" "
end
return""
--------------------------------------------------------------------
else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
return" "
--------------------------------------------------------------------
end
end

------------------------------------------------------------------------
-- repack tokens, removing deletions caused by optimization process
------------------------------------------------------------------------

local function repack_tokens()
local dtoks,dinfos,dtoklns={},{},{}
local j=1
for i=1,#stoks do
local tok=stoks[i]
if tok~=""then
dtoks[j],dinfos[j],dtoklns[j]=tok,sinfos[i],stoklns[i]
j=j+1
end
end
stoks,sinfos,stoklns=dtoks,dinfos,dtoklns
end

------------------------------------------------------------------------
-- number optimization
-- * optimization using string formatting functions is one way of doing
--   this, but here, we consider all cases and handle them separately
--   (possibly an idiotic approach...)
-- * scientific notation being generated is not in canonical form, this
--   may or may not be a bad thing
-- * note: intermediate portions need to fit into a normal number range
-- * optimizations can be divided based on number patterns:
-- * hexadecimal:
--   (1) no need to remove leading zeros, just skip to (2)
--   (2) convert to integer if size equal or smaller
--       * change if equal size -> lose the 'x' to reduce entropy
--   (3) number is then processed as an integer
--   (4) note: does not make 0[xX] consistent
-- * integer:
--   (1) note: includes anything with trailing ".", ".0", ...
--   (2) remove useless fractional part, if present, e.g. 123.000
--   (3) remove leading zeros, e.g. 000123
--   (4) switch to scientific if shorter, e.g. 123000 -> 123e3
-- * with fraction:
--   (1) split into digits dot digits
--   (2) if no integer portion, take as zero (can omit later)
--   (3) handle degenerate .000 case, after which the fractional part
--       must be non-zero (if zero, it's matched as an integer)
--   (4) remove trailing zeros for fractional portion
--   (5) p.q where p > 0 and q > 0 cannot be shortened any more
--   (6) otherwise p == 0 and the form is .q, e.g. .000123
--   (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
-- * scientific:
--   (1) split into (digits dot digits) [eE] ([+-] digits)
--   (2) if significand has ".", shift it out so it becomes an integer
--   (3) if significand is zero, just use zero
--   (4) remove leading zeros for significand
--   (5) shift out trailing zeros for significand
--   (6) examine exponent and determine which format is best:
--       integer, with fraction, scientific
------------------------------------------------------------------------

local function do_number(i)
local before=sinfos[i]-- 'before'
local z=before-- working representation
local y-- 'after', if better
--------------------------------------------------------------------
if match(z,"^0[xX]")then-- hexadecimal number
local v=base.tostring(base.tonumber(z))
if#v<=#z then
z=v-- change to integer, AND continue
else
return-- no change; stick to hex
end
end
--------------------------------------------------------------------
if match(z,"^%d+%.?0*$")then-- integer or has useless frac
z=match(z,"^(%d+)%.?0*$")-- int portion only
if z+0>0 then
z=match(z,"^0*([1-9]%d*)$")-- remove leading zeros
local v=#match(z,"0*$")
local nv=base.tostring(v)
if v>#nv+1 then-- scientific is shorter
z=sub(z,1,#z-v).."e"..nv
end
y=z
else
y="0"-- basic zero
end
--------------------------------------------------------------------
elseif not match(z,"[eE]")then-- number with fraction part
local p,q=match(z,"^(%d*)%.(%d+)$")-- split
if p==""then p=0 end-- int part zero
if q+0==0 and p==0 then
y="0"-- degenerate .000 case
else
-- now, q > 0 holds and p is a number
local v=#match(q,"0*$")-- remove trailing zeros
if v>0 then
q=sub(q,1,#q-v)
end
-- if p > 0, nothing else we can do to simplify p.q case
if p+0>0 then
y=p.."."..q
else
y="."..q-- tentative, e.g. .000123
local v=#match(q,"^0*")-- # leading spaces
local w=#q-v-- # significant digits
local nv=base.tostring(#q)
-- e.g. compare 123e-6 versus .000123
if w+2+#nv<1+#q then
y=sub(q,-w).."e-"..nv
end
end
end
--------------------------------------------------------------------
else-- scientific number
local sig,ex=match(z,"^([^eE]+)[eE]([%+%-]?%d+)$")
ex=base.tonumber(ex)
-- if got ".", shift out fractional portion of significand
local p,q=match(sig,"^(%d*)%.(%d*)$")
if p then
ex=ex-#q
sig=p..q
end
if sig+0==0 then
y="0"-- basic zero
else
local v=#match(sig,"^0*")-- remove leading zeros
sig=sub(sig,v+1)
v=#match(sig,"0*$")-- shift out trailing zeros
if v>0 then
sig=sub(sig,1,#sig-v)
ex=ex+v
end
-- examine exponent and determine which format is best
local nex=base.tostring(ex)
if ex==0 then-- it's just an integer
y=sig
elseif ex>0 and(ex<=1+#nex)then-- a number
y=sig..rep("0",ex)
elseif ex<0 and(ex>=-#sig)then-- fraction, e.g. .123
v=#sig+ex
y=sub(sig,1,v).."."..sub(sig,v+1)
elseif ex<0 and(#nex>=-ex-#sig)then
-- e.g. compare 1234e-5 versus .01234
-- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
--     -> #nex >= -ex - #sig
v=-ex-#sig
y="."..rep("0",v)..sig
else-- non-canonical scientific representation
y=sig.."e"..ex
end
end--if sig
end
--------------------------------------------------------------------
if y and y~=sinfos[i]then
if opt_details then
print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
opt_details=opt_details+1
end
sinfos[i]=y
end
end

------------------------------------------------------------------------
-- string optimization
-- * note: works on well-formed strings only!
-- * optimizations on characters can be summarized as follows:
--   \a\b\f\n\r\t\v -- no change
--   \\ -- no change
--   \"\' -- depends on delim, other can remove \
--   \[\] -- remove \
--   \<char> -- general escape, remove \
--   \<eol> -- normalize the EOL only
--   \ddd -- if \a\b\f\n\r\t\v, change to latter
--           if other < ascii 32, keep ddd but zap leading zeros
--                                but cannot have following digits
--           if >= ascii 32, translate it into the literal, then also
--                           do escapes for \\,\",\' cases
--   <other> -- no change
-- * switch delimiters if string becomes shorter
------------------------------------------------------------------------

local function do_string(I)
local info=sinfos[I]
local delim=sub(info,1,1)-- delimiter used
local ndelim=(delim=="'")and'"'or"'"-- opposite " <-> '
local z=sub(info,2,-2)-- actual string
local i=1
local c_delim,c_ndelim=0,0-- "/' counts
--------------------------------------------------------------------
while i<=#z do
local c=sub(z,i,i)
----------------------------------------------------------------
if c=="\\"then-- escaped stuff
local j=i+1
local d=sub(z,j,j)
local p=find("abfnrtv\\\n\r\"\'0123456789",d,1,true)
------------------------------------------------------------
if not p then-- \<char> -- remove \
z=sub(z,1,i-1)..sub(z,j)
i=i+1
------------------------------------------------------------
elseif p<=8 then-- \a\b\f\n\r\t\v\\
i=i+2-- no change
------------------------------------------------------------
elseif p<=10 then-- \<eol> -- normalize EOL
local eol=sub(z,j,j+1)
if eol=="\r\n"or eol=="\n\r"then
z=sub(z,1,i).."\n"..sub(z,j+2)
elseif p==10 then-- \r case
z=sub(z,1,i).."\n"..sub(z,j+1)
end
i=i+2
------------------------------------------------------------
elseif p<=12 then-- \"\' -- remove \ for ndelim
if d==delim then
c_delim=c_delim+1
i=i+2
else
c_ndelim=c_ndelim+1
z=sub(z,1,i-1)..sub(z,j)
i=i+1
end
------------------------------------------------------------
else-- \ddd -- various steps
local s=match(z,"^(%d%d?%d?)",j)
j=i+1+#s-- skip to location
local cv=s+0
local cc=string.char(cv)
local p=find("\a\b\f\n\r\t\v",cc,1,true)
if p then-- special escapes
s="\\"..sub("abfnrtv",p,p)
elseif cv<32 then-- normalized \ddd
if match(sub(z,j,j),"%d")then
-- if a digit follows, \ddd cannot be shortened
s="\\"..s
else
s="\\"..cv
end
elseif cc==delim then-- \<delim>
s="\\"..cc
c_delim=c_delim+1
elseif cc=="\\"then-- \\
s="\\\\"
else-- literal character
s=cc
if cc==ndelim then
c_ndelim=c_ndelim+1
end
end
z=sub(z,1,i-1)..s..sub(z,j)
i=i+#s
------------------------------------------------------------
end--if p
----------------------------------------------------------------
else-- c ~= "\\"                    -- <other> -- no change
i=i+1
if c==ndelim then-- count ndelim, for switching delimiters
c_ndelim=c_ndelim+1
end
----------------------------------------------------------------
end--if c
end--while
--------------------------------------------------------------------
-- switching delimiters, a long-winded derivation:
-- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
-- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
-- simplifying the condition (1)>(2) --> c_delim > c_ndelim
if c_delim>c_ndelim then
i=1
while i<=#z do
local p,q,r=find(z,"([\'\"])",i)
if not p then break end
if r==delim then-- \<delim> -> <delim>
z=sub(z,1,p-2)..sub(z,p)
i=p
else-- r == ndelim                -- <ndelim> -> \<ndelim>
z=sub(z,1,p-1).."\\"..sub(z,p)
i=p+2
end
end--while
delim=ndelim-- actually change delimiters
end
--------------------------------------------------------------------
z=delim..z..delim
if z~=sinfos[I]then
if opt_details then
print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
opt_details=opt_details+1
end
sinfos[I]=z
end
end

------------------------------------------------------------------------
-- long string optimization
-- * note: warning flagged if trailing whitespace found, not trimmed
-- * remove first optional newline
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
------------------------------------------------------------------------

local function do_lstring(I)
local info=sinfos[I]
local delim1=match(info,"^%[=*%[")-- cut out delimiters
local sep=#delim1
local delim2=sub(info,-sep,-1)
local z=sub(info,sep+1,-(sep+1))-- lstring without delims
local y=""
local i=1
--------------------------------------------------------------------
while true do
local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i)
-- deal with a single line
local ln
if not p then
ln=sub(z,i)
elseif p>=i then
ln=sub(z,i,p-1)
end
if ln~=""then
-- flag a warning if there are trailing spaces, won't optimize!
if match(ln,"%s+$")then
warn.LSTRING="trailing whitespace in long string near line "..stoklns[I]
end
y=y..ln
end
if not p then-- done if no more EOLs
break
end
-- deal with line endings, normalize them
i=p+1
if p then
if#s>0 and r~=s then-- skip CRLF or LFCR
i=i+1
end
-- skip first newline, which can be safely deleted
if not(i==1 and i==p)then
y=y.."\n"
end
end
end--while
--------------------------------------------------------------------
-- handle possible deletion of one or more '=' separators
if sep>=3 then
local chk,okay=sep-1
-- loop to test ending delimiter with less of '=' down to zero
while chk>=2 do
local delim="%]"..rep("=",chk-2).."%]"
if not match(y,delim)then okay=chk end
chk=chk-1
end
if okay then-- change delimiters
sep=rep("=",okay-2)
delim1,delim2="["..sep.."[","]"..sep.."]"
end
end
--------------------------------------------------------------------
sinfos[I]=delim1..y..delim2
end

------------------------------------------------------------------------
-- long comment optimization
-- * note: does not remove first optional newline
-- * trim trailing whitespace
-- * normalize embedded newlines
-- * reduce '=' separators in delimiters if possible
------------------------------------------------------------------------

local function do_lcomment(I)
local info=sinfos[I]
local delim1=match(info,"^%-%-%[=*%[")-- cut out delimiters
local sep=#delim1
local delim2=sub(info,-(sep-2),-1)
local z=sub(info,sep+1,-(sep-1))-- comment without delims
local y=""
local i=1
--------------------------------------------------------------------
while true do
local p,q,r,s=find(z,"([\r\n])([\r\n]?)",i)
-- deal with a single line, extract and check trailing whitespace
local ln
if not p then
ln=sub(z,i)
elseif p>=i then
ln=sub(z,i,p-1)
end
if ln~=""then
-- trim trailing whitespace if non-empty line
local ws=match(ln,"%s*$")
if#ws>0 then ln=sub(ln,1,-(ws+1))end
y=y..ln
end
if not p then-- done if no more EOLs
break
end
-- deal with line endings, normalize them
i=p+1
if p then
if#s>0 and r~=s then-- skip CRLF or LFCR
i=i+1
end
y=y.."\n"
end
end--while
--------------------------------------------------------------------
-- handle possible deletion of one or more '=' separators
sep=sep-2
if sep>=3 then
local chk,okay=sep-1
-- loop to test ending delimiter with less of '=' down to zero
while chk>=2 do
local delim="%]"..rep("=",chk-2).."%]"
if not match(y,delim)then okay=chk end
chk=chk-1
end
if okay then-- change delimiters
sep=rep("=",okay-2)
delim1,delim2="--["..sep.."[","]"..sep.."]"
end
end
--------------------------------------------------------------------
sinfos[I]=delim1..y..delim2
end

------------------------------------------------------------------------
-- short comment optimization
-- * trim trailing whitespace
------------------------------------------------------------------------

local function do_comment(i)
local info=sinfos[i]
local ws=match(info,"%s*$")-- just look from end of string
if#ws>0 then
info=sub(info,1,-(ws+1))-- trim trailing whitespace
end
sinfos[i]=info
end

------------------------------------------------------------------------
-- returns true if string found in long comment
-- * this is a feature to keep copyright or license texts
------------------------------------------------------------------------

local function keep_lcomment(opt_keep,info)
if not opt_keep then return false end-- option not set
local delim1=match(info,"^%-%-%[=*%[")-- cut out delimiters
local sep=#delim1
local delim2=sub(info,-sep,-1)
local z=sub(info,sep+1,-(sep-1))-- comment without delims
if find(z,opt_keep,1,true)then-- try to match
return true
end
end

------------------------------------------------------------------------
-- main entry point
-- * currently, lexer processing has 2 passes
-- * processing is done on a line-oriented basis, which is easier to
--   grok due to the next point...
-- * since there are various options that can be enabled or disabled,
--   processing is a little messy or convoluted
------------------------------------------------------------------------

function optimize(option,toklist,semlist,toklnlist)
--------------------------------------------------------------------
-- set option flags
--------------------------------------------------------------------
local opt_comments=option["opt-comments"]
local opt_whitespace=option["opt-whitespace"]
local opt_emptylines=option["opt-emptylines"]
local opt_eols=option["opt-eols"]
local opt_strings=option["opt-strings"]
local opt_numbers=option["opt-numbers"]
local opt_x=option["opt-experimental"]
local opt_keep=option.KEEP
opt_details=option.DETAILS and 0-- upvalues for details display
print=print or base.print
if opt_eols then-- forced settings, otherwise won't work properly
opt_comments=true
opt_whitespace=true
opt_emptylines=true
elseif opt_x then
opt_whitespace=true
end
--------------------------------------------------------------------
-- variable initialization
--------------------------------------------------------------------
stoks,sinfos,stoklns-- set source lists
=toklist,semlist,toklnlist
local i=1-- token position
local tok,info-- current token
local prev-- position of last grammar token
-- on same line (for TK_SPACE stuff)
--------------------------------------------------------------------
-- changes a token, info pair
--------------------------------------------------------------------
local function settoken(tok,info,I)
I=I or i
stoks[I]=tok or""
sinfos[I]=info or""
end
--------------------------------------------------------------------
-- experimental optimization for ';' operator
--------------------------------------------------------------------
if opt_x then
while true do
tok,info=stoks[i],sinfos[i]
if tok=="TK_EOS"then-- end of stream/pass
break
elseif tok=="TK_OP"and info==";"then
-- ';' operator found, since it is entirely optional, set it
-- as a space to let whitespace optimization do the rest
settoken("TK_SPACE"," ")
end
i=i+1
end
repack_tokens()
end
--------------------------------------------------------------------
-- processing loop (PASS 1)
--------------------------------------------------------------------
i=1
while true do
tok,info=stoks[i],sinfos[i]
----------------------------------------------------------------
local atstart=atlinestart(i)-- set line begin flag
if atstart then prev=nil end
----------------------------------------------------------------
if tok=="TK_EOS"then-- end of stream/pass
break
----------------------------------------------------------------
elseif tok=="TK_KEYWORD"or-- keywords, identifiers,
tok=="TK_NAME"or-- operators
tok=="TK_OP"then
-- TK_KEYWORD and TK_OP can't be optimized without a big
-- optimization framework; it would be more of an optimizing
-- compiler, not a source code compressor
-- TK_NAME that are locals needs parser to analyze/optimize
prev=i
----------------------------------------------------------------
elseif tok=="TK_NUMBER"then-- numbers
if opt_numbers then
do_number(i)-- optimize
end
prev=i
----------------------------------------------------------------
elseif tok=="TK_STRING"or-- strings, long strings
tok=="TK_LSTRING"then
if opt_strings then
if tok=="TK_STRING"then
do_string(i)-- optimize
else
do_lstring(i)-- optimize
end
end
prev=i
----------------------------------------------------------------
elseif tok=="TK_COMMENT"then-- short comments
if opt_comments then
if i==1 and sub(info,1,1)=="#"then
-- keep shbang comment, trim whitespace
do_comment(i)
else
-- safe to delete, as a TK_EOL (or TK_EOS) always follows
settoken()-- remove entirely
end
elseif opt_whitespace then-- trim whitespace only
do_comment(i)
end
----------------------------------------------------------------
elseif tok=="TK_LCOMMENT"then-- long comments
if keep_lcomment(opt_keep,info)then
------------------------------------------------------------
-- if --keep, we keep a long comment if <msg> is found;
-- this is a feature to keep copyright or license texts
if opt_whitespace then-- trim whitespace only
do_lcomment(i)
end
prev=i
elseif opt_comments then
local eols=commenteols(info)
------------------------------------------------------------
-- prepare opt_emptylines case first, if a disposable token
-- follows, current one is safe to dump, else keep a space;
-- it is implied that the operation is safe for '-', because
-- current is a TK_LCOMMENT, and must be separate from a '-'
if is_faketoken[stoks[i+1]]then
settoken()-- remove entirely
tok=""
else
settoken("TK_SPACE"," ")
end
------------------------------------------------------------
-- if there are embedded EOLs to keep and opt_emptylines is
-- disabled, then switch the token into one or more EOLs
if not opt_emptylines and eols>0 then
settoken("TK_EOL",rep("\n",eols))
end
------------------------------------------------------------
-- if optimizing whitespaces, force reinterpretation of the
-- token to give a chance for the space to be optimized away
if opt_whitespace and tok~=""then
i=i-1-- to reinterpret
end
------------------------------------------------------------
else-- disabled case
if opt_whitespace then-- trim whitespace only
do_lcomment(i)
end
prev=i
end
----------------------------------------------------------------
elseif tok=="TK_EOL"then-- line endings
if atstart and opt_emptylines then
settoken()-- remove entirely
elseif info=="\r\n"or info=="\n\r"then
-- normalize the rest of the EOLs for CRLF/LFCR only
-- (note that TK_LCOMMENT can change into several EOLs)
settoken("TK_EOL","\n")
end
----------------------------------------------------------------
elseif tok=="TK_SPACE"then-- whitespace
if opt_whitespace then
if atstart or atlineend(i)then
-- delete leading and trailing whitespace
settoken()-- remove entirely
else
------------------------------------------------------------
-- at this point, since leading whitespace have been removed,
-- there should be a either a real token or a TK_LCOMMENT
-- prior to hitting this whitespace; the TK_LCOMMENT case
-- only happens if opt_comments is disabled; so prev ~= nil
local ptok=stoks[prev]
if ptok=="TK_LCOMMENT"then
-- previous TK_LCOMMENT can abut with anything
settoken()-- remove entirely
else
-- prev must be a grammar token; consecutive TK_SPACE
-- tokens is impossible when optimizing whitespace
local ntok=stoks[i+1]
if is_faketoken[ntok]then
-- handle special case where a '-' cannot abut with
-- either a short comment or a long comment
if(ntok=="TK_COMMENT"or ntok=="TK_LCOMMENT")and
ptok=="TK_OP"and sinfos[prev]=="-"then
-- keep token
else
settoken()-- remove entirely
end
else--is_realtoken
-- check a pair of grammar tokens, if can abut, then
-- delete space token entirely, otherwise keep one space
local s=checkpair(prev,i+1)
if s==""then
settoken()-- remove entirely
else
settoken("TK_SPACE"," ")
end
end
end
------------------------------------------------------------
end
end
----------------------------------------------------------------
else
error("unidentified token encountered")
end
----------------------------------------------------------------
i=i+1
end--while
repack_tokens()
--------------------------------------------------------------------
-- processing loop (PASS 2)
--------------------------------------------------------------------
if opt_eols then
i=1
-- aggressive EOL removal only works with most non-grammar tokens
-- optimized away because it is a rather simple scheme -- basically
-- it just checks 'real' token pairs around EOLs
if stoks[1]=="TK_COMMENT"then
-- first comment still existing must be shbang, skip whole line
i=3
end
while true do
tok,info=stoks[i],sinfos[i]
--------------------------------------------------------------
if tok=="TK_EOS"then-- end of stream/pass
break
--------------------------------------------------------------
elseif tok=="TK_EOL"then-- consider each TK_EOL
local t1,t2=stoks[i-1],stoks[i+1]
if is_realtoken[t1]and is_realtoken[t2]then-- sanity check
local s=checkpair(i-1,i+1)
if s==""or t2=="TK_EOS"then
settoken()-- remove entirely
end
end
end--if tok
--------------------------------------------------------------
i=i+1
end--while
repack_tokens()
end
--------------------------------------------------------------------
if opt_details and opt_details>0 then print()end-- spacing
return stoks,sinfos,stoklns
end
--end of inserted module
end

-- preload function for module optparser
preload.optparser=
function()
--start of inserted module
module"optparser"

local string=base.require"string"
local table=base.require"table"

----------------------------------------------------------------------
-- Letter frequencies for reducing symbol entropy (fixed version)
-- * Might help a wee bit when the output file is compressed
-- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
-- * We use letter frequencies according to a Linotype keyboard, plus
--   the underscore, and both lower case and upper case letters.
-- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
-- * This is certainly not optimal, but is quick-and-dirty and the
--   process has no significant overhead
----------------------------------------------------------------------

local LETTERS="etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
local ALPHANUM="etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"

-- names or identifiers that must be skipped
-- * the first two lines are for keywords
local SKIP_NAME={}
for v in string.gmatch([[
and break do else elseif end false for function if in
local nil not or repeat return then true until while
self]],"%S+")do
SKIP_NAME[v]=true
end

------------------------------------------------------------------------
-- variables and data structures
------------------------------------------------------------------------

local toklist,seminfolist,-- token lists (lexer output)
tokpar,seminfopar,xrefpar,-- token lists (parser output)
globalinfo,localinfo,-- variable information tables
statinfo,-- statment type table
globaluniq,localuniq,-- unique name tables
var_new,-- index of new variable names
varlist-- list of output variables

----------------------------------------------------------------------
-- preprocess information table to get lists of unique names
----------------------------------------------------------------------

local function preprocess(infotable)
local uniqtable={}
for i=1,#infotable do-- enumerate info table
local obj=infotable[i]
local name=obj.name
--------------------------------------------------------------------
if not uniqtable[name]then-- not found, start an entry
uniqtable[name]={
decl=0,token=0,size=0,
}
end
--------------------------------------------------------------------
local uniq=uniqtable[name]-- count declarations, tokens, size
uniq.decl=uniq.decl+1
local xref=obj.xref
local xcount=#xref
uniq.token=uniq.token+xcount
uniq.size=uniq.size+xcount*#name
--------------------------------------------------------------------
if obj.decl then-- if local table, create first,last pairs
obj.id=i
obj.xcount=xcount
if xcount>1 then-- if ==1, means local never accessed
obj.first=xref[2]
obj.last=xref[xcount]
end
--------------------------------------------------------------------
else-- if global table, add a back ref
uniq.id=i
end
--------------------------------------------------------------------
end--for
return uniqtable
end

----------------------------------------------------------------------
-- calculate actual symbol frequencies, in order to reduce entropy
-- * this may help further reduce the size of compressed sources
-- * note that since parsing optimizations is put before lexing
--   optimizations, the frequency table is not exact!
-- * yes, this will miss --keep block comments too...
----------------------------------------------------------------------

local function recalc_for_entropy(option)
local byte=string.byte
local char=string.char
-- table of token classes to accept in calculating symbol frequency
local ACCEPT={
TK_KEYWORD=true,TK_NAME=true,TK_NUMBER=true,
TK_STRING=true,TK_LSTRING=true,
}
if not option["opt-comments"]then
ACCEPT.TK_COMMENT=true
ACCEPT.TK_LCOMMENT=true
end
--------------------------------------------------------------------
-- create a new table and remove any original locals by filtering
--------------------------------------------------------------------
local filtered={}
for i=1,#toklist do
filtered[i]=seminfolist[i]
end
for i=1,#localinfo do-- enumerate local info table
local obj=localinfo[i]
local xref=obj.xref
for j=1,obj.xcount do
local p=xref[j]
filtered[p]=""-- remove locals
end
end
--------------------------------------------------------------------
local freq={}-- reset symbol frequency table
for i=0,255 do freq[i]=0 end
for i=1,#toklist do-- gather symbol frequency
local tok,info=toklist[i],filtered[i]
if ACCEPT[tok]then
for j=1,#info do
local c=byte(info,j)
freq[c]=freq[c]+1
end
end--if
end--for
--------------------------------------------------------------------
-- function to re-sort symbols according to actual frequencies
--------------------------------------------------------------------
local function resort(symbols)
local symlist={}
for i=1,#symbols do-- prepare table to sort
local c=byte(symbols,i)
symlist[i]={c=c,freq=freq[c],}
end
table.sort(symlist,-- sort selected symbols
function(v1,v2)
return v1.freq>v2.freq
end
)
local charlist={}-- reconstitute the string
for i=1,#symlist do
charlist[i]=char(symlist[i].c)
end
return table.concat(charlist)
end
--------------------------------------------------------------------
LETTERS=resort(LETTERS)-- change letter arrangement
ALPHANUM=resort(ALPHANUM)
end

----------------------------------------------------------------------
-- returns a string containing a new local variable name to use, and
-- a flag indicating whether it collides with a global variable
-- * trapping keywords and other names like 'self' is done elsewhere
----------------------------------------------------------------------

local function new_var_name()
local var
local cletters,calphanum=#LETTERS,#ALPHANUM
local v=var_new
if v<cletters then-- single char
v=v+1
var=string.sub(LETTERS,v,v)
else-- longer names
local range,sz=cletters,1-- calculate # chars fit
repeat
v=v-range
range=range*calphanum
sz=sz+1
until range>v
local n=v%cletters-- left side cycles faster
v=(v-n)/cletters-- do first char first
n=n+1
var=string.sub(LETTERS,n,n)
while sz>1 do
local m=v%calphanum
v=(v-m)/calphanum
m=m+1
var=var..string.sub(ALPHANUM,m,m)
sz=sz-1
end
end
var_new=var_new+1
return var,globaluniq[var]~=nil
end

----------------------------------------------------------------------
-- calculate and print some statistics
-- * probably better in main source, put here for now
----------------------------------------------------------------------

local function stats_summary(globaluniq,localuniq,afteruniq,option)
local print=print or base.print
local fmt=string.format
local opt_details=option.DETAILS
if option.QUIET then return end
local uniq_g,uniq_li,uniq_lo,uniq_ti,uniq_to,-- stats needed
decl_g,decl_li,decl_lo,decl_ti,decl_to,
token_g,token_li,token_lo,token_ti,token_to,
size_g,size_li,size_lo,size_ti,size_to
=0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0
local function avg(c,l)-- safe average function
if c==0 then return 0 end
return l/c
end
--------------------------------------------------------------------
-- collect statistics (note: globals do not have declarations!)
--------------------------------------------------------------------
for name,uniq in base.pairs(globaluniq)do
uniq_g=uniq_g+1
token_g=token_g+uniq.token
size_g=size_g+uniq.size
end
for name,uniq in base.pairs(localuniq)do
uniq_li=uniq_li+1
decl_li=decl_li+uniq.decl
token_li=token_li+uniq.token
size_li=size_li+uniq.size
end
for name,uniq in base.pairs(afteruniq)do
uniq_lo=uniq_lo+1
decl_lo=decl_lo+uniq.decl
token_lo=token_lo+uniq.token
size_lo=size_lo+uniq.size
end
uniq_ti=uniq_g+uniq_li
decl_ti=decl_g+decl_li
token_ti=token_g+token_li
size_ti=size_g+size_li
uniq_to=uniq_g+uniq_lo
decl_to=decl_g+decl_lo
token_to=token_g+token_lo
size_to=size_g+size_lo
--------------------------------------------------------------------
-- detailed stats: global list
--------------------------------------------------------------------
if opt_details then
local sorted={}-- sort table of unique global names by size
for name,uniq in base.pairs(globaluniq)do
uniq.name=name
sorted[#sorted+1]=uniq
end
table.sort(sorted,
function(v1,v2)
return v1.size>v2.size
end
)
local tabf1,tabf2="%8s%8s%10s  %s","%8d%8d%10.2f  %s"
local hl=string.rep("-",44)
print("*** global variable list (sorted by size) ***\n"..hl)
print(fmt(tabf1,"Token","Input","Input","Global"))
print(fmt(tabf1,"Count","Bytes","Average","Name"))
print(hl)
for i=1,#sorted do
local uniq=sorted[i]
print(fmt(tabf2,uniq.token,uniq.size,avg(uniq.token,uniq.size),uniq.name))
end
print(hl)
print(fmt(tabf2,token_g,size_g,avg(token_g,size_g),"TOTAL"))
print(hl.."\n")
--------------------------------------------------------------------
-- detailed stats: local list
--------------------------------------------------------------------
local tabf1,tabf2="%8s%8s%8s%10s%8s%10s  %s","%8d%8d%8d%10.2f%8d%10.2f  %s"
local hl=string.rep("-",70)
print("*** local variable list (sorted by allocation order) ***\n"..hl)
print(fmt(tabf1,"Decl.","Token","Input","Input","Output","Output","Global"))
print(fmt(tabf1,"Count","Count","Bytes","Average","Bytes","Average","Name"))
print(hl)
for i=1,#varlist do-- iterate according to order assigned
local name=varlist[i]
local uniq=afteruniq[name]
local old_t,old_s=0,0
for j=1,#localinfo do-- find corresponding old names and calculate
local obj=localinfo[j]
if obj.name==name then
old_t=old_t+obj.xcount
old_s=old_s+obj.xcount*#obj.oldname
end
end
print(fmt(tabf2,uniq.decl,uniq.token,old_s,avg(old_t,old_s),
uniq.size,avg(uniq.token,uniq.size),name))
end
print(hl)
print(fmt(tabf2,decl_lo,token_lo,size_li,avg(token_li,size_li),
size_lo,avg(token_lo,size_lo),"TOTAL"))
print(hl.."\n")
end--if opt_details
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local tabf1,tabf2="%-16s%8s%8s%8s%8s%10s","%-16s%8d%8d%8d%8d%10.2f"
local hl=string.rep("-",58)
print("*** local variable optimization summary ***\n"..hl)
print(fmt(tabf1,"Variable","Unique","Decl.","Token","Size","Average"))
print(fmt(tabf1,"Types","Names","Count","Count","Bytes","Bytes"))
print(hl)
print(fmt(tabf2,"Global",uniq_g,decl_g,token_g,size_g,avg(token_g,size_g)))
print(hl)
print(fmt(tabf2,"Local (in)",uniq_li,decl_li,token_li,size_li,avg(token_li,size_li)))
print(fmt(tabf2,"TOTAL (in)",uniq_ti,decl_ti,token_ti,size_ti,avg(token_ti,size_ti)))
print(hl)
print(fmt(tabf2,"Local (out)",uniq_lo,decl_lo,token_lo,size_lo,avg(token_lo,size_lo)))
print(fmt(tabf2,"TOTAL (out)",uniq_to,decl_to,token_to,size_to,avg(token_to,size_to)))
print(hl.."\n")
end

----------------------------------------------------------------------
-- experimental optimization for f("string") statements
-- * safe to delete parentheses without adding whitespace, as both
--   kinds of strings can abut with anything else
----------------------------------------------------------------------

local function optimize_func1()
------------------------------------------------------------------
local function is_strcall(j)-- find f("string") pattern
local t1=tokpar[j+1]or""
local t2=tokpar[j+2]or""
local t3=tokpar[j+3]or""
if t1=="("and t2=="<string>"and t3==")"then
return true
end
end
------------------------------------------------------------------
local del_list={}-- scan for function pattern,
local i=1-- tokens to be deleted are marked
while i<=#tokpar do
local id=statinfo[i]
if id=="call"and is_strcall(i)then-- found & mark ()
del_list[i+1]=true-- '('
del_list[i+3]=true-- ')'
i=i+3
end
i=i+1
end
------------------------------------------------------------------
-- delete a token and adjust all relevant tables
-- * currently invalidates globalinfo and localinfo (not updated),
--   so any other optimization is done after processing locals
--   (of course, we can also lex the source data again...)
-- * faster one-pass token deletion
------------------------------------------------------------------
local i,dst,idend=1,1,#tokpar
local del_list2={}
while dst<=idend do-- process parser tables
if del_list[i]then-- found a token to delete?
del_list2[xrefpar[i]]=true
i=i+1
end
if i>dst then
if i<=idend then-- shift table items lower
tokpar[dst]=tokpar[i]
seminfopar[dst]=seminfopar[i]
xrefpar[dst]=xrefpar[i]-(i-dst)
statinfo[dst]=statinfo[i]
else-- nil out excess entries
tokpar[dst]=nil
seminfopar[dst]=nil
xrefpar[dst]=nil
statinfo[dst]=nil
end
end
i=i+1
dst=dst+1
end
local i,dst,idend=1,1,#toklist
while dst<=idend do-- process lexer tables
if del_list2[i]then-- found a token to delete?
i=i+1
end
if i>dst then
if i<=idend then-- shift table items lower
toklist[dst]=toklist[i]
seminfolist[dst]=seminfolist[i]
else-- nil out excess entries
toklist[dst]=nil
seminfolist[dst]=nil
end
end
i=i+1
dst=dst+1
end
end

----------------------------------------------------------------------
-- local variable optimization
----------------------------------------------------------------------

local function optimize_locals(option)
var_new=0-- reset variable name allocator
varlist={}
------------------------------------------------------------------
-- preprocess global/local tables, handle entropy reduction
------------------------------------------------------------------
globaluniq=preprocess(globalinfo)
localuniq=preprocess(localinfo)
if option["opt-entropy"]then-- for entropy improvement
recalc_for_entropy(option)
end
------------------------------------------------------------------
-- build initial declared object table, then sort according to
-- token count, this might help assign more tokens to more common
-- variable names such as 'e' thus possibly reducing entropy
-- * an object knows its localinfo index via its 'id' field
-- * special handling for "self" special local (parameter) here
------------------------------------------------------------------
local object={}
for i=1,#localinfo do
object[i]=localinfo[i]
end
table.sort(object,-- sort largest first
function(v1,v2)
return v1.xcount>v2.xcount
end
)
------------------------------------------------------------------
-- the special "self" function parameters must be preserved
-- * the allocator below will never use "self", so it is safe to
--   keep those implicit declarations as-is
------------------------------------------------------------------
local temp,j,gotself={},1,false
for i=1,#object do
local obj=object[i]
if not obj.isself then
temp[j]=obj
j=j+1
else
gotself=true
end
end
object=temp
------------------------------------------------------------------
-- a simple first-come first-served heuristic name allocator,
-- note that this is in no way optimal...
-- * each object is a local variable declaration plus existence
-- * the aim is to assign short names to as many tokens as possible,
--   so the following tries to maximize name reuse
-- * note that we preserve sort order
------------------------------------------------------------------
local nobject=#object
while nobject>0 do
local varname,gcollide
repeat
varname,gcollide=new_var_name()-- collect a variable name
until not SKIP_NAME[varname]-- skip all special names
varlist[#varlist+1]=varname-- keep a list
local oleft=nobject
------------------------------------------------------------------
-- if variable name collides with an existing global, the name
-- cannot be used by a local when the name is accessed as a global
-- during which the local is alive (between 'act' to 'rem'), so
-- we drop objects that collides with the corresponding global
------------------------------------------------------------------
if gcollide then
-- find the xref table of the global
local gref=globalinfo[globaluniq[varname].id].xref
local ngref=#gref
-- enumerate for all current objects; all are valid at this point
for i=1,nobject do
local obj=object[i]
local act,rem=obj.act,obj.rem-- 'live' range of local
-- if rem < 0, it is a -id to a local that had the same name
-- so follow rem to extend it; does this make sense?
while rem<0 do
rem=localinfo[-rem].rem
end
local drop
for j=1,ngref do
local p=gref[j]
if p>=act and p<=rem then drop=true end-- in range?
end
if drop then
obj.skip=true
oleft=oleft-1
end
end--for
end--if gcollide
------------------------------------------------------------------
-- now the first unassigned local (since it's sorted) will be the
-- one with the most tokens to rename, so we set this one and then
-- eliminate all others that collides, then any locals that left
-- can then reuse the same variable name; this is repeated until
-- all local declaration that can use this name is assigned
-- * the criteria for local-local reuse/collision is:
--   A is the local with a name already assigned
--   B is the unassigned local under consideration
--   => anytime A is accessed, it cannot be when B is 'live'
--   => to speed up things, we have first/last accesses noted
------------------------------------------------------------------
while oleft>0 do
local i=1
while object[i].skip do-- scan for first object
i=i+1
end
------------------------------------------------------------------
-- first object is free for assignment of the variable name
-- [first,last] gives the access range for collision checking
------------------------------------------------------------------
oleft=oleft-1
local obja=object[i]
i=i+1
obja.newname=varname
obja.skip=true
obja.done=true
local first,last=obja.first,obja.last
local xref=obja.xref
------------------------------------------------------------------
-- then, scan all the rest and drop those colliding
-- if A was never accessed then it'll never collide with anything
-- otherwise trivial skip if:
-- * B was activated after A's last access (last < act)
-- * B was removed before A's first access (first > rem)
-- if not, see detailed skip below...
------------------------------------------------------------------
if first and oleft>0 then-- must have at least 1 access
local scanleft=oleft
while scanleft>0 do
while object[i].skip do-- next valid object
i=i+1
end
scanleft=scanleft-1
local objb=object[i]
i=i+1
local act,rem=objb.act,objb.rem-- live range of B
-- if rem < 0, extend range of rem thru' following local
while rem<0 do
rem=localinfo[-rem].rem
end
--------------------------------------------------------
if not(last<act or first>rem)then-- possible collision
--------------------------------------------------------
-- B is activated later than A or at the same statement,
-- this means for no collision, A cannot be accessed when B
-- is alive, since B overrides A (or is a peer)
--------------------------------------------------------
if act>=obja.act then
for j=1,obja.xcount do-- ... then check every access
local p=xref[j]
if p>=act and p<=rem then-- A accessed when B live!
oleft=oleft-1
objb.skip=true
break
end
end--for
--------------------------------------------------------
-- A is activated later than B, this means for no collision,
-- A's access is okay since it overrides B, but B's last
-- access need to be earlier than A's activation time
--------------------------------------------------------
else
if objb.last and objb.last>=obja.act then
oleft=oleft-1
objb.skip=true
end
end
end
--------------------------------------------------------
if oleft==0 then break end
end
end--if first
------------------------------------------------------------------
end--while
------------------------------------------------------------------
-- after assigning all possible locals to one variable name, the
-- unassigned locals/objects have the skip field reset and the table
-- is compacted, to hopefully reduce iteration time
------------------------------------------------------------------
local temp,j={},1
for i=1,nobject do
local obj=object[i]
if not obj.done then
obj.skip=false
temp[j]=obj
j=j+1
end
end
object=temp-- new compacted object table
nobject=#object-- objects left to process
------------------------------------------------------------------
end--while
------------------------------------------------------------------
-- after assigning all locals with new variable names, we can
-- patch in the new names, and reprocess to get 'after' stats
------------------------------------------------------------------
for i=1,#localinfo do-- enumerate all locals
local obj=localinfo[i]
local xref=obj.xref
if obj.newname then-- if got new name, patch it in
for j=1,obj.xcount do
local p=xref[j]-- xrefs indexes the token list
seminfolist[p]=obj.newname
end
obj.name,obj.oldname-- adjust names
=obj.newname,obj.name
else
obj.oldname=obj.name-- for cases like 'self'
end
end
------------------------------------------------------------------
-- deal with statistics output
------------------------------------------------------------------
if gotself then-- add 'self' to end of list
varlist[#varlist+1]="self"
end
local afteruniq=preprocess(localinfo)
stats_summary(globaluniq,localuniq,afteruniq,option)
end


----------------------------------------------------------------------
-- main entry point
----------------------------------------------------------------------

function optimize(option,_toklist,_seminfolist,xinfo)
-- set tables
toklist,seminfolist-- from lexer
=_toklist,_seminfolist
tokpar,seminfopar,xrefpar-- from parser
=xinfo.toklist,xinfo.seminfolist,xinfo.xreflist
globalinfo,localinfo,statinfo-- from parser
=xinfo.globalinfo,xinfo.localinfo,xinfo.statinfo
------------------------------------------------------------------
-- optimize locals
------------------------------------------------------------------
if option["opt-locals"]then
optimize_locals(option)
end
------------------------------------------------------------------
-- other optimizations
------------------------------------------------------------------
if option["opt-experimental"]then-- experimental
optimize_func1()
-- WARNING globalinfo and localinfo now invalidated!
end
end
--end of inserted module
end

-- preload function for module equiv
preload.equiv=
function()
--start of inserted module
module"equiv"

local string=base.require"string"
local loadstring=base.loadstring
local sub=string.sub
local match=string.match
local dump=string.dump
local byte=string.byte

--[[--------------------------------------------------------------------
-- variable and data initialization
----------------------------------------------------------------------]]

local is_realtoken={-- significant (grammar) tokens
TK_KEYWORD=true,
TK_NAME=true,
TK_NUMBER=true,
TK_STRING=true,
TK_LSTRING=true,
TK_OP=true,
TK_EOS=true,
}

local option,llex,warn

--[[--------------------------------------------------------------------
-- functions
----------------------------------------------------------------------]]

------------------------------------------------------------------------
-- initialization function
------------------------------------------------------------------------

function init(_option,_llex,_warn)
option=_option
llex=_llex
warn=_warn
end

------------------------------------------------------------------------
-- function to build lists containing a 'normal' lexer stream
------------------------------------------------------------------------

local function build_stream(s)
llex.init(s)
llex.llex()
local stok,sseminfo-- source list (with whitespace elements)
=llex.tok,llex.seminfo
local tok,seminfo-- processed list (real elements only)
={},{}
for i=1,#stok do
local t=stok[i]
if is_realtoken[t]then
tok[#tok+1]=t
seminfo[#seminfo+1]=sseminfo[i]
end
end--for
return tok,seminfo
end

------------------------------------------------------------------------
-- test source (lexer stream) equivalence
------------------------------------------------------------------------

function source(z,dat)
--------------------------------------------------------------------
-- function to return a dumped string for seminfo compares
--------------------------------------------------------------------
local function dumpsem(s)
local sf=loadstring("return "..s,"z")
if sf then
return dump(sf)
end
end
--------------------------------------------------------------------
-- mark and optionally report non-equivalence
--------------------------------------------------------------------
local function bork(msg)
if option.DETAILS then base.print("SRCEQUIV: "..msg)end
warn.SRC_EQUIV=true
end
--------------------------------------------------------------------
-- get lexer streams for both source strings, compare
--------------------------------------------------------------------
local tok1,seminfo1=build_stream(z)-- original
local tok2,seminfo2=build_stream(dat)-- compressed
--------------------------------------------------------------------
-- compare shbang lines ignoring EOL
--------------------------------------------------------------------
local sh1=match(z,"^(#[^\r\n]*)")
local sh2=match(dat,"^(#[^\r\n]*)")
if sh1 or sh2 then
if not sh1 or not sh2 or sh1~=sh2 then
bork("shbang lines different")
end
end
--------------------------------------------------------------------
-- compare by simple count
--------------------------------------------------------------------
if#tok1~=#tok2 then
bork("count "..#tok1.." "..#tok2)
return
end
--------------------------------------------------------------------
-- compare each element the best we can
--------------------------------------------------------------------
for i=1,#tok1 do
local t1,t2=tok1[i],tok2[i]
local s1,s2=seminfo1[i],seminfo2[i]
if t1~=t2 then-- by type
bork("type ["..i.."] "..t1.." "..t2)
break
end
if t1=="TK_KEYWORD"or t1=="TK_NAME"or t1=="TK_OP"then
if t1=="TK_NAME"and option["opt-locals"]then
-- can't compare identifiers of locals that are optimized
elseif s1~=s2 then-- by semantic info (simple)
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
elseif t1=="TK_EOS"then
-- no seminfo to compare
else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING"
-- compare 'binary' form, so dump a function
local s1b,s2b=dumpsem(s1),dumpsem(s2)
if not s1b or not s2b or s1b~=s2b then
bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
break
end
end
end--for
--------------------------------------------------------------------
-- successful comparison if end is reached with no borks
--------------------------------------------------------------------
end

------------------------------------------------------------------------
-- test binary chunk equivalence
------------------------------------------------------------------------

function binary(z,dat)
local TNIL=0
local TBOOLEAN=1
local TNUMBER=3
local TSTRING=4
--------------------------------------------------------------------
-- mark and optionally report non-equivalence
--------------------------------------------------------------------
local function bork(msg)
if option.DETAILS then base.print("BINEQUIV: "..msg)end
warn.BIN_EQUIV=true
end
--------------------------------------------------------------------
-- function to remove shbang line so that loadstring runs
--------------------------------------------------------------------
local function zap_shbang(s)
local shbang=match(s,"^(#[^\r\n]*\r?\n?)")
if shbang then-- cut out shbang
s=sub(s,#shbang+1)
end
return s
end
--------------------------------------------------------------------
-- attempt to compile, then dump to get binary chunk string
--------------------------------------------------------------------
local cz=loadstring(zap_shbang(z),"z")
if not cz then
bork("failed to compile original sources for binary chunk comparison")
return
end
local cdat=loadstring(zap_shbang(dat),"z")
if not cdat then
bork("failed to compile compressed result for binary chunk comparison")
end
-- if loadstring() works, dump assuming string.dump() is error-free
local c1={i=1,dat=dump(cz)}
c1.len=#c1.dat
local c2={i=1,dat=dump(cdat)}
c2.len=#c2.dat
--------------------------------------------------------------------
-- support functions to handle binary chunk reading
--------------------------------------------------------------------
local endian,
sz_int,sz_sizet,-- sizes of data types
sz_inst,sz_number,
getint,getsizet
--------------------------------------------------------------------
local function ensure(c,sz)-- check if bytes exist
if c.i+sz-1>c.len then return end
return true
end
--------------------------------------------------------------------
local function skip(c,sz)-- skip some bytes
if not sz then sz=1 end
c.i=c.i+sz
end
--------------------------------------------------------------------
local function getbyte(c)-- return a byte value
local i=c.i
if i>c.len then return end
local d=sub(c.dat,i,i)
c.i=i+1
return byte(d)
end
--------------------------------------------------------------------
local function getint_l(c)-- return an int value (little-endian)
local n,scale=0,1
if not ensure(c,sz_int)then return end
for j=1,sz_int do
n=n+scale*getbyte(c)
scale=scale*256
end
return n
end
--------------------------------------------------------------------
local function getint_b(c)-- return an int value (big-endian)
local n=0
if not ensure(c,sz_int)then return end
for j=1,sz_int do
n=n*256+getbyte(c)
end
return n
end
--------------------------------------------------------------------
local function getsizet_l(c)-- return a size_t value (little-endian)
local n,scale=0,1
if not ensure(c,sz_sizet)then return end
for j=1,sz_sizet do
n=n+scale*getbyte(c)
scale=scale*256
end
return n
end
--------------------------------------------------------------------
local function getsizet_b(c)-- return a size_t value (big-endian)
local n=0
if not ensure(c,sz_sizet)then return end
for j=1,sz_sizet do
n=n*256+getbyte(c)
end
return n
end
--------------------------------------------------------------------
local function getblock(c,sz)-- return a block (as a string)
local i=c.i
local j=i+sz-1
if j>c.len then return end
local d=sub(c.dat,i,j)
c.i=i+sz
return d
end
--------------------------------------------------------------------
local function getstring(c)-- return a string
local n=getsizet(c)
if not n then return end
if n==0 then return""end
return getblock(c,n)
end
--------------------------------------------------------------------
local function goodbyte(c1,c2)-- compare byte value
local b1,b2=getbyte(c1),getbyte(c2)
if not b1 or not b2 or b1~=b2 then
return
end
return b1
end
--------------------------------------------------------------------
local function badbyte(c1,c2)-- compare byte value
local b=goodbyte(c1,c2)
if not b then return true end
end
--------------------------------------------------------------------
local function goodint(c1,c2)-- compare int value
local i1,i2=getint(c1),getint(c2)
if not i1 or not i2 or i1~=i2 then
return
end
return i1
end
--------------------------------------------------------------------
-- recursively-called function to compare function prototypes
--------------------------------------------------------------------
local function getfunc(c1,c2)
-- source name (ignored)
if not getstring(c1)or not getstring(c2)then
bork("bad source name");return
end
-- linedefined (ignored)
if not getint(c1)or not getint(c2)then
bork("bad linedefined");return
end
-- lastlinedefined (ignored)
if not getint(c1)or not getint(c2)then
bork("bad lastlinedefined");return
end
if not(ensure(c1,4)and ensure(c2,4))then
bork("prototype header broken")
end
-- nups (compared)
if badbyte(c1,c2)then
bork("bad nups");return
end
-- numparams (compared)
if badbyte(c1,c2)then
bork("bad numparams");return
end
-- is_vararg (compared)
if badbyte(c1,c2)then
bork("bad is_vararg");return
end
-- maxstacksize (compared)
if badbyte(c1,c2)then
bork("bad maxstacksize");return
end
-- code (compared)
local ncode=goodint(c1,c2)
if not ncode then
bork("bad ncode");return
end
local code1=getblock(c1,ncode*sz_inst)
local code2=getblock(c2,ncode*sz_inst)
if not code1 or not code2 or code1~=code2 then
bork("bad code block");return
end
-- constants (compared)
local nconst=goodint(c1,c2)
if not nconst then
bork("bad nconst");return
end
for i=1,nconst do
local ctype=goodbyte(c1,c2)
if not ctype then
bork("bad const type");return
end
if ctype==TBOOLEAN then
if badbyte(c1,c2)then
bork("bad boolean value");return
end
elseif ctype==TNUMBER then
local num1=getblock(c1,sz_number)
local num2=getblock(c2,sz_number)
if not num1 or not num2 or num1~=num2 then
bork("bad number value");return
end
elseif ctype==TSTRING then
local str1=getstring(c1)
local str2=getstring(c2)
if not str1 or not str2 or str1~=str2 then
bork("bad string value");return
end
end
end
-- prototypes (compared recursively)
local nproto=goodint(c1,c2)
if not nproto then
bork("bad nproto");return
end
for i=1,nproto do
if not getfunc(c1,c2)then
bork("bad function prototype");return
end
end
-- debug information (ignored)
-- lineinfo (ignored)
local sizelineinfo1=getint(c1)
if not sizelineinfo1 then
bork("bad sizelineinfo1");return
end
local sizelineinfo2=getint(c2)
if not sizelineinfo2 then
bork("bad sizelineinfo2");return
end
if not getblock(c1,sizelineinfo1*sz_int)then
bork("bad lineinfo1");return
end
if not getblock(c2,sizelineinfo2*sz_int)then
bork("bad lineinfo2");return
end
-- locvars (ignored)
local sizelocvars1=getint(c1)
if not sizelocvars1 then
bork("bad sizelocvars1");return
end
local sizelocvars2=getint(c2)
if not sizelocvars2 then
bork("bad sizelocvars2");return
end
for i=1,sizelocvars1 do
if not getstring(c1)or not getint(c1)or not getint(c1)then
bork("bad locvars1");return
end
end
for i=1,sizelocvars2 do
if not getstring(c2)or not getint(c2)or not getint(c2)then
bork("bad locvars2");return
end
end
-- upvalues (ignored)
local sizeupvalues1=getint(c1)
if not sizeupvalues1 then
bork("bad sizeupvalues1");return
end
local sizeupvalues2=getint(c2)
if not sizeupvalues2 then
bork("bad sizeupvalues2");return
end
for i=1,sizeupvalues1 do
if not getstring(c1)then bork("bad upvalues1");return end
end
for i=1,sizeupvalues2 do
if not getstring(c2)then bork("bad upvalues2");return end
end
return true
end
--------------------------------------------------------------------
-- parse binary chunks to verify equivalence
-- * for headers, handle sizes to allow a degree of flexibility
-- * assume a valid binary chunk is generated, since it was not
--   generated via external means
--------------------------------------------------------------------
if not(ensure(c1,12)and ensure(c2,12))then
bork("header broken")
end
skip(c1,6)-- skip signature(4), version, format
endian=getbyte(c1)-- 1 = little endian
sz_int=getbyte(c1)-- get data type sizes
sz_sizet=getbyte(c1)
sz_inst=getbyte(c1)
sz_number=getbyte(c1)
skip(c1)-- skip integral flag
skip(c2,12)-- skip other header (assume similar)
if endian==1 then-- set for endian sensitive data we need
getint=getint_l
getsizet=getsizet_l
else
getint=getint_b
getsizet=getsizet_b
end
getfunc(c1,c2)-- get prototype at root
if c1.i~=c1.len+1 then
bork("inconsistent binary chunk1");return
elseif c2.i~=c2.len+1 then
bork("inconsistent binary chunk2");return
end
--------------------------------------------------------------------
-- successful comparison if end is reached with no borks
--------------------------------------------------------------------
end
--end of inserted module
end

-- preload function for module plugin/html
preload["plugin/html"]=
function()
--start of inserted module
module"plugin/html"

local string=base.require"string"
local table=base.require"table"
local io=base.require"io"

------------------------------------------------------------------------
-- constants and configuration
------------------------------------------------------------------------

local HTML_EXT=".html"
local ENTITIES={
["&"]="&amp;",["<"]="&lt;",[">"]="&gt;",
["'"]="&apos;",["\""]="&quot;",
}

-- simple headers and footers
local HEADER=[[
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>%s</title>
<meta name="Generator" content="LuaSrcDiet">
<style type="text/css">
%s</style>
</head>
<body>
<pre class="code">
]]
local FOOTER=[[
</pre>
</body>
</html>
]]
-- for more, please see wikimain.css from the Lua wiki site
local STYLESHEET=[[
BODY {
    background: white;
    color: navy;
}
pre.code { color: black; }
span.comment { color: #00a000; }
span.string  { color: #009090; }
span.keyword { color: black; font-weight: bold; }
span.number { color: #993399; }
span.operator { }
span.name { }
span.global { color: #ff0000; font-weight: bold; }
span.local { color: #0000ff; font-weight: bold; }
]]

------------------------------------------------------------------------
-- option handling, plays nice with --quiet option
------------------------------------------------------------------------

local option-- local reference to list of options
local srcfl,destfl-- filenames
local toklist,seminfolist,toklnlist-- token data

local function print(...)-- handle quiet option
if option.QUIET then return end
base.print(...)
end

------------------------------------------------------------------------
-- initialization
------------------------------------------------------------------------

function init(_option,_srcfl,_destfl)
option=_option
srcfl=_srcfl
local extb,exte=string.find(srcfl,"%.[^%.%\\%/]*$")
local basename,extension=srcfl,""
if extb and extb>1 then
basename=string.sub(srcfl,1,extb-1)
extension=string.sub(srcfl,extb,exte)
end
destfl=basename..HTML_EXT
if option.OUTPUT_FILE then
destfl=option.OUTPUT_FILE
end
if srcfl==destfl then
base.error("output filename identical to input filename")
end
end

------------------------------------------------------------------------
-- message display, post-load processing
------------------------------------------------------------------------

function post_load(z)
print([[
HTML plugin module for LuaSrcDiet
]])
print("Exporting: "..srcfl.." -> "..destfl.."\n")
end

------------------------------------------------------------------------
-- post-lexing processing, can work on lexer table output
------------------------------------------------------------------------

function post_lex(_toklist,_seminfolist,_toklnlist)
toklist,seminfolist,toklnlist
=_toklist,_seminfolist,_toklnlist
end

------------------------------------------------------------------------
-- escape the usual suspects for HTML/XML
------------------------------------------------------------------------

local function do_entities(z)
local i=1
while i<=#z do
local c=string.sub(z,i,i)
local d=ENTITIES[c]
if d then
c=d
z=string.sub(z,1,i-1)..c..string.sub(z,i+1)
end
i=i+#c
end--while
return z
end

------------------------------------------------------------------------
-- save source code to file
------------------------------------------------------------------------

local function save_file(fname,dat)
local OUTF=io.open(fname,"wb")
if not OUTF then base.error("cannot open \""..fname.."\" for writing")end
local status=OUTF:write(dat)
if not status then base.error("cannot write to \""..fname.."\"")end
OUTF:close()
end

------------------------------------------------------------------------
-- post-parsing processing, gives globalinfo, localinfo
------------------------------------------------------------------------

function post_parse(globalinfo,localinfo)
local html={}
local function add(s)-- html helpers
html[#html+1]=s
end
local function span(class,s)
add('<span class="'..class..'">'..s..'</span>')
end
----------------------------------------------------------------------
for i=1,#globalinfo do-- mark global identifiers as TK_GLOBAL
local obj=globalinfo[i]
local xref=obj.xref
for j=1,#xref do
local p=xref[j]
toklist[p]="TK_GLOBAL"
end
end--for
----------------------------------------------------------------------
for i=1,#localinfo do-- mark local identifiers as TK_LOCAL
local obj=localinfo[i]
local xref=obj.xref
for j=1,#xref do
local p=xref[j]
toklist[p]="TK_LOCAL"
end
end--for
----------------------------------------------------------------------
add(string.format(HEADER,-- header and leading stuff
do_entities(srcfl),
STYLESHEET))
for i=1,#toklist do-- enumerate token list
local tok,info=toklist[i],seminfolist[i]
if tok=="TK_KEYWORD"then
span("keyword",info)
elseif tok=="TK_STRING"or tok=="TK_LSTRING"then
span("string",do_entities(info))
elseif tok=="TK_COMMENT"or tok=="TK_LCOMMENT"then
span("comment",do_entities(info))
elseif tok=="TK_GLOBAL"then
span("global",info)
elseif tok=="TK_LOCAL"then
span("local",info)
elseif tok=="TK_NAME"then
span("name",info)
elseif tok=="TK_NUMBER"then
span("number",info)
elseif tok=="TK_OP"then
span("operator",do_entities(info))
elseif tok~="TK_EOS"then-- TK_EOL, TK_SPACE
add(info)
end
end--for
add(FOOTER)
save_file(destfl,table.concat(html))
option.EXIT=true
end
--end of inserted module
end

-- preload function for module plugin/sloc
preload["plugin/sloc"]=
function()
--start of inserted module
module"plugin/sloc"

local string=base.require"string"
local table=base.require"table"

------------------------------------------------------------------------
-- initialization
------------------------------------------------------------------------

local option-- local reference to list of options
local srcfl-- source file name

function init(_option,_srcfl,_destfl)
option=_option
option.QUIET=true
srcfl=_srcfl
end

------------------------------------------------------------------------
-- splits a block into a table of lines (minus EOLs)
------------------------------------------------------------------------

local function split(blk)
local lines={}
local i,nblk=1,#blk
while i<=nblk do
local p,q,r,s=string.find(blk,"([\r\n])([\r\n]?)",i)
if not p then
p=nblk+1
end
lines[#lines+1]=string.sub(blk,i,p-1)
i=p+1
if p<nblk and q>p and r~=s then-- handle Lua-style CRLF, LFCR
i=i+1
end
end
return lines
end

------------------------------------------------------------------------
-- post-lexing processing, can work on lexer table output
------------------------------------------------------------------------

function post_lex(toklist,seminfolist,toklnlist)
local lnow,sloc=0,0
local function chk(ln)-- if a new line, count it as an SLOC
if ln>lnow then-- new line # must be > old line #
sloc=sloc+1;lnow=ln
end
end
for i=1,#toklist do-- enumerate over all tokens
local tok,info,ln
=toklist[i],seminfolist[i],toklnlist[i]
--------------------------------------------------------------------
if tok=="TK_KEYWORD"or tok=="TK_NAME"or-- significant
tok=="TK_NUMBER"or tok=="TK_OP"then
chk(ln)
--------------------------------------------------------------------
-- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop
-- is needed in order to mark off lines one-by-one. Since llex.lua
-- currently returns the line number of the last part of the string,
-- we must subtract in order to get the starting line number.
--------------------------------------------------------------------
elseif tok=="TK_STRING"then-- possible multi-line
local t=split(info)
ln=ln-#t+1
for j=1,#t do
chk(ln);ln=ln+1
end
--------------------------------------------------------------------
elseif tok=="TK_LSTRING"then-- possible multi-line
local t=split(info)
ln=ln-#t+1
for j=1,#t do
if t[j]~=""then chk(ln)end
ln=ln+1
end
--------------------------------------------------------------------
-- other tokens are comments or whitespace and are ignored
--------------------------------------------------------------------
end
end--for
base.print(srcfl..": "..sloc)-- display result
option.EXIT=true
end
--end of inserted module
end

-- support modules
local llex=require"llex"
local lparser=require"lparser"
local optlex=require"optlex"
local optparser=require"optparser"
local equiv=require"equiv"
local plugin

--[[--------------------------------------------------------------------
-- messages and textual data
----------------------------------------------------------------------]]

local MSG_TITLE=[[
LuaSrcDiet: Puts your Lua 5.1 source code on a diet
Version 0.12.1 (20120407)  Copyright (c) 2012 Kein-Hong Man
The COPYRIGHT file describes the conditions under which this
software may be distributed.
]]

local MSG_USAGE=[[
usage: LuaSrcDiet [options] [filenames]

example:
  >LuaSrcDiet myscript.lua -o myscript_.lua

options:
  -v, --version       prints version information
  -h, --help          prints usage information
  -o <file>           specify file name to write output
  -s <suffix>         suffix for output files (default '_')
  --keep <msg>        keep block comment with <msg> inside
  --plugin <module>   run <module> in plugin/ directory
  -                   stop handling arguments

  (optimization levels)
  --none              all optimizations off (normalizes EOLs only)
  --basic             lexer-based optimizations only
  --maximum           maximize reduction of source

  (informational)
  --quiet             process files quietly
  --read-only         read file and print token stats only
  --dump-lexer        dump raw tokens from lexer to stdout
  --dump-parser       dump variable tracking tables from parser
  --details           extra info (strings, numbers, locals)

features (to disable, insert 'no' prefix like --noopt-comments):
%s
default settings:
%s]]

------------------------------------------------------------------------
-- optimization options, for ease of switching on and off
-- * positive to enable optimization, negative (no) to disable
-- * these options should follow --opt-* and --noopt-* style for now
------------------------------------------------------------------------

local OPTION=[[
--opt-comments,'remove comments and block comments'
--opt-whitespace,'remove whitespace excluding EOLs'
--opt-emptylines,'remove empty lines'
--opt-eols,'all above, plus remove unnecessary EOLs'
--opt-strings,'optimize strings and long strings'
--opt-numbers,'optimize numbers'
--opt-locals,'optimize local variable names'
--opt-entropy,'tries to reduce symbol entropy of locals'
--opt-srcequiv,'insist on source (lexer stream) equivalence'
--opt-binequiv,'insist on binary chunk equivalence'
--opt-experimental,'apply experimental optimizations'
]]

-- preset configuration
local DEFAULT_CONFIG=[[
  --opt-comments --opt-whitespace --opt-emptylines
  --opt-numbers --opt-locals
  --opt-srcequiv --opt-binequiv
]]
-- override configurations
-- * MUST explicitly enable/disable everything for
--   total option replacement
local BASIC_CONFIG=[[
  --opt-comments --opt-whitespace --opt-emptylines
  --noopt-eols --noopt-strings --noopt-numbers
  --noopt-locals --noopt-entropy
  --opt-srcequiv --opt-binequiv
]]
local MAXIMUM_CONFIG=[[
  --opt-comments --opt-whitespace --opt-emptylines
  --opt-eols --opt-strings --opt-numbers
  --opt-locals --opt-entropy
  --opt-srcequiv --opt-binequiv
]]
local NONE_CONFIG=[[
  --noopt-comments --noopt-whitespace --noopt-emptylines
  --noopt-eols --noopt-strings --noopt-numbers
  --noopt-locals --noopt-entropy
  --opt-srcequiv --opt-binequiv
]]

local DEFAULT_SUFFIX="_"-- default suffix for file renaming
local PLUGIN_SUFFIX="plugin/"-- relative location of plugins

--[[--------------------------------------------------------------------
-- startup and initialize option list handling
----------------------------------------------------------------------]]

-- simple error message handler; change to error if traceback wanted
local function die(msg)
print("LuaSrcDiet (error): "..msg);os.exit(1)
end
--die = error--DEBUG

if not match(_VERSION,"5.1",1,1)then-- sanity check
die("requires Lua 5.1 to run")
end

------------------------------------------------------------------------
-- prepares text for list of optimizations, prepare lookup table
------------------------------------------------------------------------

local MSG_OPTIONS=""
do
local WIDTH=24
local o={}
for op,desc in gmatch(OPTION,"%s*([^,]+),'([^']+)'")do
local msg="  "..op
msg=msg..string.rep(" ",WIDTH-#msg)..desc.."\n"
MSG_OPTIONS=MSG_OPTIONS..msg
o[op]=true
o["--no"..sub(op,3)]=true
end
OPTION=o-- replace OPTION with lookup table
end

MSG_USAGE=string.format(MSG_USAGE,MSG_OPTIONS,DEFAULT_CONFIG)

if p_embedded then-- embedded plugins
local EMBED_INFO="\nembedded plugins:\n"
for i=1,#p_embedded do
local p=p_embedded[i]
EMBED_INFO=EMBED_INFO.."  "..plugin_info[p].."\n"
end
MSG_USAGE=MSG_USAGE..EMBED_INFO
end

------------------------------------------------------------------------
-- global variable initialization, option set handling
------------------------------------------------------------------------

local suffix=DEFAULT_SUFFIX-- file suffix
local option={}-- program options
local stat_c,stat_l-- statistics tables

-- function to set option lookup table based on a text list of options
-- note: additional forced settings for --opt-eols is done in optlex.lua
local function set_options(CONFIG)
for op in gmatch(CONFIG,"(%-%-%S+)")do
if sub(op,3,4)=="no"and-- handle negative options
OPTION["--"..sub(op,5)]then
option[sub(op,5)]=false
else
option[sub(op,3)]=true
end
end
end

--[[--------------------------------------------------------------------
-- support functions
----------------------------------------------------------------------]]

-- list of token types, parser-significant types are up to TTYPE_GRAMMAR
-- while the rest are not used by parsers; arranged for stats display
local TTYPES={
"TK_KEYWORD","TK_NAME","TK_NUMBER",-- grammar
"TK_STRING","TK_LSTRING","TK_OP",
"TK_EOS",
"TK_COMMENT","TK_LCOMMENT",-- non-grammar
"TK_EOL","TK_SPACE",
}
local TTYPE_GRAMMAR=7

local EOLTYPES={-- EOL names for token dump
["\n"]="LF",["\r"]="CR",
["\n\r"]="LFCR",["\r\n"]="CRLF",
}

------------------------------------------------------------------------
-- read source code from file
------------------------------------------------------------------------

local function load_file(fname)
local INF=io.open(fname,"rb")
if not INF then die('cannot open "'..fname..'" for reading')end
local dat=INF:read("*a")
if not dat then die('cannot read from "'..fname..'"')end
INF:close()
return dat
end

------------------------------------------------------------------------
-- save source code to file
------------------------------------------------------------------------

local function save_file(fname,dat)
local OUTF=io.open(fname,"wb")
if not OUTF then die('cannot open "'..fname..'" for writing')end
local status=OUTF:write(dat)
if not status then die('cannot write to "'..fname..'"')end
OUTF:close()
end

------------------------------------------------------------------------
-- functions to deal with statistics
------------------------------------------------------------------------

-- initialize statistics table
local function stat_init()
stat_c,stat_l={},{}
for i=1,#TTYPES do
local ttype=TTYPES[i]
stat_c[ttype],stat_l[ttype]=0,0
end
end

-- add a token to statistics table
local function stat_add(tok,seminfo)
stat_c[tok]=stat_c[tok]+1
stat_l[tok]=stat_l[tok]+#seminfo
end

-- do totals for statistics table, return average table
local function stat_calc()
local function avg(c,l)-- safe average function
if c==0 then return 0 end
return l/c
end
local stat_a={}
local c,l=0,0
for i=1,TTYPE_GRAMMAR do-- total grammar tokens
local ttype=TTYPES[i]
c=c+stat_c[ttype];l=l+stat_l[ttype]
end
stat_c.TOTAL_TOK,stat_l.TOTAL_TOK=c,l
stat_a.TOTAL_TOK=avg(c,l)
c,l=0,0
for i=1,#TTYPES do-- total all tokens
local ttype=TTYPES[i]
c=c+stat_c[ttype];l=l+stat_l[ttype]
stat_a[ttype]=avg(stat_c[ttype],stat_l[ttype])
end
stat_c.TOTAL_ALL,stat_l.TOTAL_ALL=c,l
stat_a.TOTAL_ALL=avg(c,l)
return stat_a
end

--[[--------------------------------------------------------------------
-- main tasks
----------------------------------------------------------------------]]

------------------------------------------------------------------------
-- a simple token dumper, minimal translation of seminfo data
------------------------------------------------------------------------

local function dump_tokens(srcfl)
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z=load_file(srcfl)
llex.init(z)
llex.llex()
local toklist,seminfolist=llex.tok,llex.seminfo
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
for i=1,#toklist do
local tok,seminfo=toklist[i],seminfolist[i]
if tok=="TK_OP"and string.byte(seminfo)<32 then
seminfo="("..string.byte(seminfo)..")"
elseif tok=="TK_EOL"then
seminfo=EOLTYPES[seminfo]
else
seminfo="'"..seminfo.."'"
end
print(tok.." "..seminfo)
end--for
end

----------------------------------------------------------------------
-- parser dump; dump globalinfo and localinfo tables
----------------------------------------------------------------------

local function dump_parser(srcfl)
local print=print
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z=load_file(srcfl)
llex.init(z)
llex.llex()
local toklist,seminfolist,toklnlist
=llex.tok,llex.seminfo,llex.tokln
--------------------------------------------------------------------
-- do parser optimization here
--------------------------------------------------------------------
lparser.init(toklist,seminfolist,toklnlist)
local xinfo=lparser.parser()
local globalinfo,localinfo=
xinfo.globalinfo,xinfo.localinfo
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local hl=string.rep("-",72)
print("*** Local/Global Variable Tracker Tables ***")
print(hl.."\n GLOBALS\n"..hl)
-- global tables have a list of xref numbers only
for i=1,#globalinfo do
local obj=globalinfo[i]
local msg="("..i..") '"..obj.name.."' -> "
local xref=obj.xref
for j=1,#xref do msg=msg..xref[j].." "end
print(msg)
end
-- local tables have xref numbers and a few other special
-- numbers that are specially named: decl (declaration xref),
-- act (activation xref), rem (removal xref)
print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
for i=1,#localinfo do
local obj=localinfo[i]
local msg="("..i..") '"..obj.name.."' decl:"..obj.decl..
" act:"..obj.act.." rem:"..obj.rem
if obj.isself then
msg=msg.." isself"
end
msg=msg.." -> "
local xref=obj.xref
for j=1,#xref do msg=msg..xref[j].." "end
print(msg)
end
print(hl.."\n")
end

------------------------------------------------------------------------
-- reads source file(s) and reports some statistics
------------------------------------------------------------------------

local function read_only(srcfl)
local print=print
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z=load_file(srcfl)
llex.init(z)
llex.llex()
local toklist,seminfolist=llex.tok,llex.seminfo
print(MSG_TITLE)
print("Statistics for: "..srcfl.."\n")
--------------------------------------------------------------------
-- collect statistics
--------------------------------------------------------------------
stat_init()
for i=1,#toklist do
local tok,seminfo=toklist[i],seminfolist[i]
stat_add(tok,seminfo)
end--for
local stat_a=stat_calc()
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
local fmt=string.format
local function figures(tt)
return stat_c[tt],stat_l[tt],stat_a[tt]
end
local tabf1,tabf2="%-16s%8s%8s%10s","%-16s%8d%8d%10.2f"
local hl=string.rep("-",42)
print(fmt(tabf1,"Lexical","Input","Input","Input"))
print(fmt(tabf1,"Elements","Count","Bytes","Average"))
print(hl)
for i=1,#TTYPES do
local ttype=TTYPES[i]
print(fmt(tabf2,ttype,figures(ttype)))
if ttype=="TK_EOS"then print(hl)end
end
print(hl)
print(fmt(tabf2,"Total Elements",figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2,"Total Tokens",figures("TOTAL_TOK")))
print(hl.."\n")
end

------------------------------------------------------------------------
-- process source file(s), write output and reports some statistics
------------------------------------------------------------------------

local function process_file(srcfl,destfl)
local function print(...)-- handle quiet option
if option.QUIET then return end
_G.print(...)
end
if plugin and plugin.init then-- plugin init
option.EXIT=false
plugin.init(option,srcfl,destfl)
if option.EXIT then return end
end
print(MSG_TITLE)-- title message
--------------------------------------------------------------------
-- load file and process source input into tokens
--------------------------------------------------------------------
local z=load_file(srcfl)
if plugin and plugin.post_load then-- plugin post-load
z=plugin.post_load(z)or z
if option.EXIT then return end
end
llex.init(z)
llex.llex()
local toklist,seminfolist,toklnlist
=llex.tok,llex.seminfo,llex.tokln
if plugin and plugin.post_lex then-- plugin post-lex
plugin.post_lex(toklist,seminfolist,toklnlist)
if option.EXIT then return end
end
--------------------------------------------------------------------
-- collect 'before' statistics
--------------------------------------------------------------------
stat_init()
for i=1,#toklist do
local tok,seminfo=toklist[i],seminfolist[i]
stat_add(tok,seminfo)
end--for
local stat1_a=stat_calc()
local stat1_c,stat1_l=stat_c,stat_l
--------------------------------------------------------------------
-- do parser optimization here
--------------------------------------------------------------------
optparser.print=print-- hack
lparser.init(toklist,seminfolist,toklnlist)
local xinfo=lparser.parser()
if plugin and plugin.post_parse then-- plugin post-parse
plugin.post_parse(xinfo.globalinfo,xinfo.localinfo)
if option.EXIT then return end
end
optparser.optimize(option,toklist,seminfolist,xinfo)
if plugin and plugin.post_optparse then-- plugin post-optparse
plugin.post_optparse()
if option.EXIT then return end
end
--------------------------------------------------------------------
-- do lexer optimization here, save output file
--------------------------------------------------------------------
local warn=optlex.warn-- use this as a general warning lookup
optlex.print=print-- hack
toklist,seminfolist,toklnlist
=optlex.optimize(option,toklist,seminfolist,toklnlist)
if plugin and plugin.post_optlex then-- plugin post-optlex
plugin.post_optlex(toklist,seminfolist,toklnlist)
if option.EXIT then return end
end
local dat=table.concat(seminfolist)
-- depending on options selected, embedded EOLs in long strings and
-- long comments may not have been translated to \n, tack a warning
if string.find(dat,"\r\n",1,1)or
string.find(dat,"\n\r",1,1)then
warn.MIXEDEOL=true
end
--------------------------------------------------------------------
-- test source and binary chunk equivalence
--------------------------------------------------------------------
equiv.init(option,llex,warn)
equiv.source(z,dat)
equiv.binary(z,dat)
local smsg="before and after lexer streams are NOT equivalent!"
local bmsg="before and after binary chunks are NOT equivalent!"
-- for reporting, die if option was selected, else just warn
if warn.SRC_EQUIV then
if option["opt-srcequiv"]then die(smsg)end
else
print("*** SRCEQUIV: token streams are sort of equivalent")
if option["opt-locals"]then
print("(but no identifier comparisons since --opt-locals enabled)")
end
print()
end
if warn.BIN_EQUIV then
if option["opt-binequiv"]then die(bmsg)end
else
print("*** BINEQUIV: binary chunks are sort of equivalent")
print()
end
--------------------------------------------------------------------
-- save optimized source stream to output file
--------------------------------------------------------------------
save_file(destfl,dat)
--------------------------------------------------------------------
-- collect 'after' statistics
--------------------------------------------------------------------
stat_init()
for i=1,#toklist do
local tok,seminfo=toklist[i],seminfolist[i]
stat_add(tok,seminfo)
end--for
local stat_a=stat_calc()
--------------------------------------------------------------------
-- display output
--------------------------------------------------------------------
print("Statistics for: "..srcfl.." -> "..destfl.."\n")
local fmt=string.format
local function figures(tt)
return stat1_c[tt],stat1_l[tt],stat1_a[tt],
stat_c[tt],stat_l[tt],stat_a[tt]
end
local tabf1,tabf2="%-16s%8s%8s%10s%8s%8s%10s",
"%-16s%8d%8d%10.2f%8d%8d%10.2f"
local hl=string.rep("-",68)
print("*** lexer-based optimizations summary ***\n"..hl)
print(fmt(tabf1,"Lexical",
"Input","Input","Input",
"Output","Output","Output"))
print(fmt(tabf1,"Elements",
"Count","Bytes","Average",
"Count","Bytes","Average"))
print(hl)
for i=1,#TTYPES do
local ttype=TTYPES[i]
print(fmt(tabf2,ttype,figures(ttype)))
if ttype=="TK_EOS"then print(hl)end
end
print(hl)
print(fmt(tabf2,"Total Elements",figures("TOTAL_ALL")))
print(hl)
print(fmt(tabf2,"Total Tokens",figures("TOTAL_TOK")))
print(hl)
--------------------------------------------------------------------
-- report warning flags from optimizing process
--------------------------------------------------------------------
if warn.LSTRING then
print("* WARNING: "..warn.LSTRING)
elseif warn.MIXEDEOL then
print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
elseif warn.SRC_EQUIV then
print("* WARNING: "..smsg)
elseif warn.BIN_EQUIV then
print("* WARNING: "..bmsg)
end
print()
end

--[[--------------------------------------------------------------------
-- main functions
----------------------------------------------------------------------]]

local arg={...}-- program arguments
local fspec={}
set_options(DEFAULT_CONFIG)-- set to default options at beginning

------------------------------------------------------------------------
-- per-file handling, ship off to tasks
------------------------------------------------------------------------

local function do_files(fspec)
for i=1,#fspec do
local srcfl=fspec[i]
local destfl
------------------------------------------------------------------
-- find and replace extension for filenames
------------------------------------------------------------------
local extb,exte=string.find(srcfl,"%.[^%.%\\%/]*$")
local basename,extension=srcfl,""
if extb and extb>1 then
basename=sub(srcfl,1,extb-1)
extension=sub(srcfl,extb,exte)
end
destfl=basename..suffix..extension
if#fspec==1 and option.OUTPUT_FILE then
destfl=option.OUTPUT_FILE
end
if srcfl==destfl then
die("output filename identical to input filename")
end
------------------------------------------------------------------
-- perform requested operations
------------------------------------------------------------------
if option.DUMP_LEXER then
dump_tokens(srcfl)
elseif option.DUMP_PARSER then
dump_parser(srcfl)
elseif option.READ_ONLY then
read_only(srcfl)
else
process_file(srcfl,destfl)
end
end--for
end

------------------------------------------------------------------------
-- main function (entry point is after this definition)
------------------------------------------------------------------------

local function main()
local argn,i=#arg,1
if argn==0 then
option.HELP=true
end
--------------------------------------------------------------------
-- handle arguments
--------------------------------------------------------------------
while i<=argn do
local o,p=arg[i],arg[i+1]
local dash=match(o,"^%-%-?")
if dash=="-"then-- single-dash options
if o=="-h"then
option.HELP=true;break
elseif o=="-v"then
option.VERSION=true;break
elseif o=="-s"then
if not p then die("-s option needs suffix specification")end
suffix=p
i=i+1
elseif o=="-o"then
if not p then die("-o option needs a file name")end
option.OUTPUT_FILE=p
i=i+1
elseif o=="-"then
break-- ignore rest of args
else
die("unrecognized option "..o)
end
elseif dash=="--"then-- double-dash options
if o=="--help"then
option.HELP=true;break
elseif o=="--version"then
option.VERSION=true;break
elseif o=="--keep"then
if not p then die("--keep option needs a string to match for")end
option.KEEP=p
i=i+1
elseif o=="--plugin"then
if not p then die("--plugin option needs a module name")end
if option.PLUGIN then die("only one plugin can be specified")end
option.PLUGIN=p
plugin=require(PLUGIN_SUFFIX..p)
i=i+1
elseif o=="--quiet"then
option.QUIET=true
elseif o=="--read-only"then
option.READ_ONLY=true
elseif o=="--basic"then
set_options(BASIC_CONFIG)
elseif o=="--maximum"then
set_options(MAXIMUM_CONFIG)
elseif o=="--none"then
set_options(NONE_CONFIG)
elseif o=="--dump-lexer"then
option.DUMP_LEXER=true
elseif o=="--dump-parser"then
option.DUMP_PARSER=true
elseif o=="--details"then
option.DETAILS=true
elseif OPTION[o]then-- lookup optimization options
set_options(o)
else
die("unrecognized option "..o)
end
else
fspec[#fspec+1]=o-- potential filename
end
i=i+1
end--while
if option.HELP then
print(MSG_TITLE..MSG_USAGE);return true
elseif option.VERSION then
print(MSG_TITLE);return true
end
if#fspec>0 then
if#fspec>1 and option.OUTPUT_FILE then
die("with -o, only one source file can be specified")
end
do_files(fspec)
return true
else
die("nothing to do!")
end
end

-- entry point -> main() -> do_files()
if not main()then
die("Please run with option -h or --help for usage information")
end

-- end of script
